示例#1
0
 def __init__(self, graph=None, debug=0):
     if graph is None:
         graph = Graph()
     self.graphident = self
     self.graph = graph
     self.debug = debug
     self.indent = 0
     graph.add_node(self, None)
示例#2
0
 def __init__(self, graph=None, debug=0):
     if graph is None:
         graph = Graph()
     self.graphident = self
     self.graph = graph
     self.debug = debug
     self.indent = 0
     graph.add_node(self, None)
示例#3
0
    def test_nodes(self):
        graph = Graph()

        self.assertEqual(graph.node_list(), [])

        o1 = object()
        o1b = object()
        o2 = object()
        graph.add_node(1, o1)
        graph.add_node(1, o1b)
        graph.add_node(2, o2)
        graph.add_node(3)

        self.assertRaises(TypeError, graph.add_node, [])

        self.assertTrue(graph.node_data(1) is o1)
        self.assertTrue(graph.node_data(2) is o2)
        self.assertTrue(graph.node_data(3) is None)

        self.assertTrue(1 in graph)
        self.assertTrue(2 in graph)
        self.assertTrue(3 in graph)

        self.assertEqual(graph.number_of_nodes(), 3)
        self.assertEqual(graph.number_of_hidden_nodes(), 0)
        self.assertEqual(graph.hidden_node_list(), [])
        self.assertEqual(list(sorted(graph)), [1, 2, 3])

        graph.hide_node(1)
        graph.hide_node(2)
        graph.hide_node(3)


        self.assertEqual(graph.number_of_nodes(), 0)
        self.assertEqual(graph.number_of_hidden_nodes(), 3)
        self.assertEqual(list(sorted(graph.hidden_node_list())), [1, 2, 3])

        self.assertFalse(1 in graph)
        self.assertFalse(2 in graph)
        self.assertFalse(3 in graph)

        graph.add_node(1)
        self.assertFalse(1 in graph)

        graph.restore_node(1)
        self.assertTrue(1 in graph)
        self.assertFalse(2 in graph)
        self.assertFalse(3 in graph)

        graph.restore_all_nodes()
        self.assertTrue(1 in graph)
        self.assertTrue(2 in graph)
        self.assertTrue(3 in graph)

        self.assertEqual(list(sorted(graph.node_list())), [1, 2, 3])

        v = graph.describe_node(1)
        self.assertEqual(v, (1, o1, [], []))
示例#4
0
    def test_connected(self):
        graph = Graph()
        graph.add_node(1)
        graph.add_node(2)
        graph.add_node(3)
        graph.add_node(4)

        self.assertFalse(graph.connected())

        graph.add_edge(1, 2)
        graph.add_edge(3, 4)
        self.assertFalse(graph.connected())

        graph.add_edge(2, 3)
        graph.add_edge(4, 1)
        self.assertTrue(graph.connected())
示例#5
0
    def test_bfs_subgraph_does_not_reverse_egde_direction(self):
        graph = Graph()
        graph.add_node('A')
        graph.add_node('B')
        graph.add_node('C')
        graph.add_edge('A', 'B')
        graph.add_edge('B', 'C')

        whole_graph = graph.forw_topo_sort()
        subgraph_backward = graph.back_bfs_subgraph('C')
        subgraph_backward = subgraph_backward.forw_topo_sort()
        self.assertEquals(whole_graph, subgraph_backward)

        subgraph_forward = graph.forw_bfs_subgraph('A')
        subgraph_forward = subgraph_forward.forw_topo_sort()
        self.assertEquals(whole_graph, subgraph_forward)
示例#6
0
def copy_to_graph(session):
    graph = Graph()
    count = 0
    for artist in session.query(LastFmArtist).all():
        if artist.graph_out:
            graph.add_node(artist.id, artist)
            edges = list(artist.graph_out)
            edges.sort(key=lambda e: e.weight, reverse=True)
            for edge in edges[0:2]:
                target = edge.to_
                graph.add_node(target.id, target)
                graph.add_edge(artist.id, target.id, edge, create_nodes=False)
            if not count % 100:
                LOG.info('Added {0} nodes.'.format(count))
            count += 1
        else:
            LOG.warn('Discarding unconnected {0}.'.format(artist.name))
    return graph
示例#7
0
文件: workflows.py 项目: stain/rabix
class Workflow(Process):
    def __init__(self,
                 process_id,
                 inputs,
                 outputs,
                 requirements,
                 hints,
                 label,
                 description,
                 steps,
                 context,
                 data_links=None):
        super(Workflow, self).__init__(process_id, inputs, outputs,
                                       requirements, hints, label, description)
        self.graph = Graph()
        self.executor = context.executor
        self.steps = steps
        self.data_links = data_links or []
        self.context = context
        self.port_step_index = {}

        for step in steps:
            node = AppNode(step.app, {})
            self.add_node(step.id, node)
            for inp in step.inputs:
                self.port_step_index[inp.id] = step.id
                self.move_connect_to_datalink(inp)
                if inp.value:
                    node.inputs[inp.id] = inp.value

            for out in step.outputs:
                self.port_step_index[out.id] = step.id

        for inp in self.inputs:
            self.add_node(inp.id, inp)

        for out in self.outputs:
            self.move_connect_to_datalink(out)
            self.add_node(out.id, out)

        # dedupe links
        s = {tuple(dl.items()) for dl in self.data_links}
        self.data_links = [dict(dl) for dl in s]

        for dl in self.data_links:
            dst = dl['destination'].lstrip('#')
            src = dl['source'].lstrip('#')

            if src in self.port_step_index and dst in self.port_step_index:
                rel = Relation(src, dst, dl.get('position', 0))
                src = self.port_step_index[src]
                dst = self.port_step_index[dst]
            elif src in self._inputs:
                rel = InputRelation(dst, dl.get('position', 0))
                dst = self.port_step_index[dst]
            elif dst in self._outputs:
                rel = OutputRelation(src, dl.get('position', 0))
                src = self.port_step_index[src]
            else:
                raise RabixError("invalid data link %s" % dl)

            self.graph.add_edge(src, dst, rel)

        if not self.graph.connected():
            pass
            # raise ValidationError('Graph is not connected')

    def move_connect_to_datalink(self, port):
        for src in port.source:
            self.data_links.append({
                'source': src,
                'destination': '#' + port.id
            })
        del port.source[:]

    # Graph.add_node silently fails if node already exists
    def add_node(self, node_id, node):
        if node_id in self.graph.nodes:
            raise ValidationError('Duplicate node ID: %s' % node_id)
        self.graph.add_node(node_id, node)

    def run(self, job):
        eg = ExecutionGraph(self, job)
        while eg.has_next():
            next_id, next = eg.next_job()
            self.executor.execute(next, eg.job_done, next_id)
        return eg.outputs

    def to_dict(self, context):
        d = super(Workflow, self).to_dict(context)
        d.update({
            "class": "Workflow",
            'steps': [step.to_dict(context) for step in self.steps]
        })
        return d

    @classmethod
    def from_dict(cls, context, d):
        converted = {}
        for k, v in six.iteritems(d):
            if k == 'steps':
                converted[k] = [Step.from_dict(context, s) for s in v]
            else:
                converted[k] = context.from_dict(v)

        kwargs = Process.kwarg_dict(converted)
        kwargs.update({
            'steps':
            converted['steps'],
            'data_links':
            converted.get('dataLinks'),
            'context':
            context,
            'inputs': [
                InputParameter.from_dict(context, i)
                for i in converted['inputs']
            ],
            'outputs': [
                WorkflowOutput.from_dict(context, o)
                for o in converted['outputs']
            ]
        })
        return cls(**kwargs)
示例#8
0
文件: workflows.py 项目: lowks/rabix
class Workflow(Process):

    def __init__(self, process_id, inputs, outputs, requirements, hints, label,
                 description, steps, context, data_links=None):
        super(Workflow, self).__init__(
            process_id, inputs, outputs, requirements,
            hints, label, description
        )
        self.graph = Graph()
        self.executor = context.executor
        self.steps = steps
        self.data_links = data_links or []
        self.context = context
        self.port_step_index = {}

        for step in steps:
            node = AppNode(step.app, {})
            self.add_node(step.id, node)
            for inp in step.inputs:
                self.port_step_index[inp.id] = step.id
                self.move_connect_to_datalink(inp)
                if inp.value:
                    node.inputs[inp.id] = inp.value

            for out in step.outputs:
                self.port_step_index[out.id] = step.id

        for inp in self.inputs:
            self.add_node(inp.id, inp)

        for out in self.outputs:
            self.move_connect_to_datalink(out)
            self.add_node(out.id, out)

        for dl in self.data_links:
            dst = dl['destination'].lstrip('#')
            src = dl['source'].lstrip('#')

            if src in self.port_step_index and dst in self.port_step_index:
                rel = Relation(src, dst, dl.get('position', 0))
                src = self.port_step_index[src]
                dst = self.port_step_index[dst]
            elif src in self._inputs:
                rel = InputRelation(dst, dl.get('position', 0))
                dst = self.port_step_index[dst]
            elif dst in self._outputs:
                rel = OutputRelation(src, dl.get('position', 0))
                src = self.port_step_index[src]
            else:
                raise RabixError("invalid data link %s" % dl)

            self.graph.add_edge(src, dst, rel)

        if not self.graph.connected():
            pass
            # raise ValidationError('Graph is not connected')

    def move_connect_to_datalink(self, port):
        for dl in port.connect:
            dl['destination'] = '#'+port.id
            self.data_links.append(dl)
        del port.connect[:]

    # Graph.add_node silently fails if node already exists
    def add_node(self, node_id, node):
        if node_id in self.graph.nodes:
            raise ValidationError('Duplicate node ID: %s' % node_id)
        self.graph.add_node(node_id, node)

    def hide_nodes(self, type):
        for node_id in self.graph.node_list():
            node = self.graph.node_data(node_id)
            if isinstance(node, type):
                self.graph.hide_node(node_id)

    def run(self, job):
        eg = ExecutionGraph(self, job)
        while eg.has_next():
            next_id, next = eg.next_job()
            self.executor.execute(next, eg.job_done, next_id)
        return eg.outputs

    def to_dict(self, context):
        d = super(Workflow, self).to_dict(context)
        d.update({
            "class": "Workflow",
            'steps': [step.to_dict(context) for step in self.steps]
        })
        return d

    @classmethod
    def from_dict(cls, context, d):
        converted = {}
        for k, v in six.iteritems(d):
            if k == 'steps':
                converted[k] = [Step.from_dict(context, s) for s in v]
            else:
                converted[k] = context.from_dict(v)

        kwargs = Process.kwarg_dict(converted)
        kwargs.update({
            'steps': converted['steps'],
            'data_links': converted.get('dataLinks'),
            'context': context,
            'inputs': [InputParameter.from_dict(context, i)
                       for i in converted['inputs']],
            'outputs': [WorkflowOutput.from_dict(context, o)
                        for o in converted['outputs']]
        })
        return cls(**kwargs)
示例#9
0
    def test_edges(self):
        graph = Graph()
        graph.add_node(1)
        graph.add_node(2)
        graph.add_node(3)
        graph.add_node(4)
        graph.add_node(5)

        self.assertTrue(isinstance(graph.edge_list(), list))

        graph.add_edge(1, 2)
        graph.add_edge(4, 5, 'a')

        self.assertRaises(GraphError, graph.add_edge, 'a', 'b', create_nodes=False)

        self.assertEqual(graph.number_of_hidden_edges(), 0)
        self.assertEqual(graph.number_of_edges(), 2)
        e = graph.edge_by_node(1, 2)
        self.assertTrue(isinstance(e, int))
        graph.hide_edge(e)
        self.assertEqual(graph.number_of_hidden_edges(), 1)
        self.assertEqual(graph.number_of_edges(), 1)
        e2 = graph.edge_by_node(1, 2)
        self.assertTrue(e2 is None)

        graph.restore_edge(e)
        e2 = graph.edge_by_node(1, 2)
        self.assertEqual(e, e2)
        self.assertEqual(graph.number_of_hidden_edges(), 0)

        self.assertEqual(graph.number_of_edges(), 2)

        e1 = graph.edge_by_node(1, 2)
        e2 = graph.edge_by_node(4, 5)
        graph.hide_edge(e1)
        graph.hide_edge(e2)

        self.assertEqual(graph.number_of_edges(), 0)
        graph.restore_all_edges()
        self.assertEqual(graph.number_of_edges(), 2)

        self.assertEqual(graph.edge_by_id(e1), (1,2))
        self.assertRaises(GraphError, graph.edge_by_id, (e1+1)*(e2+1)+1)

        self.assertEqual(list(sorted(graph.edge_list())), [e1, e2])

        self.assertEqual(graph.describe_edge(e1), (e1, 1, 1, 2))
        self.assertEqual(graph.describe_edge(e2), (e2, 'a', 4, 5))

        self.assertEqual(graph.edge_data(e1), 1)
        self.assertEqual(graph.edge_data(e2), 'a')

        self.assertEqual(graph.head(e2), 4)
        self.assertEqual(graph.tail(e2), 5)

        graph.add_edge(1, 3)
        graph.add_edge(1, 5)
        graph.add_edge(4, 1)

        self.assertEqual(list(sorted(graph.out_nbrs(1))), [2, 3, 5])
        self.assertEqual(list(sorted(graph.inc_nbrs(1))), [4])
        self.assertEqual(list(sorted(graph.inc_nbrs(5))), [1, 4])
        self.assertEqual(list(sorted(graph.all_nbrs(1))), [2, 3, 4, 5])

        graph.add_edge(5, 1)
        self.assertEqual(list(sorted(graph.all_nbrs(5))), [1, 4])

        self.assertEqual(graph.out_degree(1), 3)
        self.assertEqual(graph.inc_degree(2), 1)
        self.assertEqual(graph.inc_degree(5), 2)
        self.assertEqual(graph.all_degree(5), 3)

        v = graph.out_edges(4)
        self.assertTrue(isinstance(v, list))
        self.assertEqual(graph.edge_by_id(v[0]), (4, 5))

        v = graph.out_edges(1)
        for e in v:
            self.assertEqual(graph.edge_by_id(e)[0], 1)

        v = graph.inc_edges(1)
        self.assertTrue(isinstance(v, list))
        self.assertEqual(graph.edge_by_id(v[0]), (4, 1))

        v = graph.inc_edges(5)
        for e in v:
            self.assertEqual(graph.edge_by_id(e)[1], 5)

        v = graph.all_edges(5)
        for e in v:
            self.assertTrue(graph.edge_by_id(e)[1] == 5 or graph.edge_by_id(e)[0] == 5)

        e1 = graph.edge_by_node(1, 2)
        self.assertTrue(isinstance(e1, int))
        graph.hide_node(1)
        self.assertRaises(GraphError, graph.edge_by_node, 1, 2)
        graph.restore_node(1)
        e2 = graph.edge_by_node(1, 2)
        self.assertEqual(e1, e2)
示例#10
0
    def test_iterdata(self):
        graph = Graph()
        graph.add_node("1", "I")
        graph.add_node("1.1", "I.I")
        graph.add_node("1.2", "I.II")
        graph.add_node("1.3", "I.III")
        graph.add_node("1.1.1", "I.I.I")
        graph.add_node("1.1.2", "I.I.II")
        graph.add_node("1.2.1", "I.II.I")
        graph.add_node("1.2.2", "I.II.II")
        graph.add_node("1.2.2.1", "I.II.II.I")
        graph.add_node("1.2.2.2", "I.II.II.II")
        graph.add_node("1.2.2.3", "I.II.II.III")

        graph.add_edge("1", "1.1")
        graph.add_edge("1", "1.2")
        graph.add_edge("1", "1.3")
        graph.add_edge("1.1", "1.1.1")
        graph.add_edge("1.1", "1.1.2")
        graph.add_edge("1.2", "1.2.1")
        graph.add_edge("1.2", "1.2.2")
        graph.add_edge("1.2.2", "1.2.2.1")
        graph.add_edge("1.2.2", "1.2.2.2")
        graph.add_edge("1.2.2", "1.2.2.3")

        result = list(graph.iterdata("1", forward=True))
        self.assertEqual(result, [
            'I', 'I.III', 'I.II', 'I.II.II', 'I.II.II.III', 'I.II.II.II',
            'I.II.II.I', 'I.II.I', 'I.I', 'I.I.II', 'I.I.I'
        ])

        result = list(graph.iterdata("1", end="1.2.1", forward=True))
        self.assertEqual(result, [
            'I', 'I.III', 'I.II', 'I.II.II', 'I.II.II.III', 'I.II.II.II',
            'I.II.II.I', 'I.II.I'
        ])

        result = list(graph.iterdata("1", condition=lambda n: len(n) < 6, forward=True))
        self.assertEqual(result, [
            'I', 'I.III', 'I.II',
            'I.I', 'I.I.I'
        ])


        # And the revese option:
        graph = Graph()
        graph.add_node("1", "I")
        graph.add_node("1.1", "I.I")
        graph.add_node("1.2", "I.II")
        graph.add_node("1.3", "I.III")
        graph.add_node("1.1.1", "I.I.I")
        graph.add_node("1.1.2", "I.I.II")
        graph.add_node("1.2.1", "I.II.I")
        graph.add_node("1.2.2", "I.II.II")
        graph.add_node("1.2.2.1", "I.II.II.I")
        graph.add_node("1.2.2.2", "I.II.II.II")
        graph.add_node("1.2.2.3", "I.II.II.III")

        graph.add_edge("1.1", "1")
        graph.add_edge("1.2", "1")
        graph.add_edge("1.3", "1")
        graph.add_edge("1.1.1", "1.1")
        graph.add_edge("1.1.2", "1.1")
        graph.add_edge("1.2.1", "1.2")
        graph.add_edge("1.2.2", "1.2")
        graph.add_edge("1.2.2.1", "1.2.2")
        graph.add_edge("1.2.2.2", "1.2.2")
        graph.add_edge("1.2.2.3", "1.2.2")

        result = list(graph.iterdata("1", forward=False))
        self.assertEqual(result, [
            'I', 'I.III', 'I.II', 'I.II.II', 'I.II.II.III', 'I.II.II.II',
            'I.II.II.I', 'I.II.I', 'I.I', 'I.I.II', 'I.I.I'
        ])

        result = list(graph.iterdata("1", end="1.2.1", forward=False))
        self.assertEqual(result, [
            'I', 'I.III', 'I.II', 'I.II.II', 'I.II.II.III', 'I.II.II.II',
            'I.II.II.I', 'I.II.I'
        ])

        result = list(graph.iterdata("1", condition=lambda n: len(n) < 6, forward=False))
        self.assertEqual(result, [
            'I', 'I.III', 'I.II',
            'I.I', 'I.I.I'
        ])
示例#11
0
    def test_toposort(self):
        graph = Graph()
        graph.add_node(1)
        graph.add_node(2)
        graph.add_node(3)
        graph.add_node(4)
        graph.add_node(5)

        graph.add_edge(1, 2)
        graph.add_edge(1, 3)
        graph.add_edge(2, 4)
        graph.add_edge(3, 5)

        ok, result = graph.forw_topo_sort()
        self.assertTrue(ok)
        for idx in range(1, 6):
            self.assertTrue(idx in result)

        self.assertTrue(result.index(1) < result.index(2))
        self.assertTrue(result.index(1) < result.index(3))
        self.assertTrue(result.index(2) < result.index(4))
        self.assertTrue(result.index(3) < result.index(5))

        ok, result = graph.back_topo_sort()
        self.assertTrue(ok)
        for idx in range(1, 6):
            self.assertTrue(idx in result)
        self.assertTrue(result.index(2) < result.index(1))
        self.assertTrue(result.index(3) < result.index(1))
        self.assertTrue(result.index(4) < result.index(2))
        self.assertTrue(result.index(5) < result.index(3))


        # Same graph as before, but with edges
        # reversed, which means we should get
        # the same results as before if using
        # back_topo_sort rather than forw_topo_sort
        # (and v.v.)

        graph = Graph()
        graph.add_node(1)
        graph.add_node(2)
        graph.add_node(3)
        graph.add_node(4)
        graph.add_node(5)

        graph.add_edge(2, 1)
        graph.add_edge(3, 1)
        graph.add_edge(4, 2)
        graph.add_edge(5, 3)

        ok, result = graph.back_topo_sort()
        self.assertTrue(ok)
        for idx in range(1, 6):
            self.assertTrue(idx in result)

        self.assertTrue(result.index(1) < result.index(2))
        self.assertTrue(result.index(1) < result.index(3))
        self.assertTrue(result.index(2) < result.index(4))
        self.assertTrue(result.index(3) < result.index(5))

        ok, result = graph.forw_topo_sort()
        self.assertTrue(ok)
        for idx in range(1, 6):
            self.assertTrue(idx in result)
        self.assertTrue(result.index(2) < result.index(1))
        self.assertTrue(result.index(3) < result.index(1))
        self.assertTrue(result.index(4) < result.index(2))
        self.assertTrue(result.index(5) < result.index(3))


        # Create a cycle
        graph.add_edge(1, 5)
        ok, result = graph.forw_topo_sort()
        self.assertFalse(ok)
        ok, result = graph.back_topo_sort()
        self.assertFalse(ok)
示例#12
0
class WorkflowApp(App):

    def __init__(self, app_id, steps, context,
                 inputs=None, outputs=None, to=None,
                 app_description=None,
                 annotations=None,
                 platform_features=None):
        self.graph = Graph()
        self.inputs = inputs or []
        self.outputs = outputs or []
        self.executor = context.executor
        self.steps = steps
        self.to = to or {}
        self.context = context

        for step in steps:
            self.add_node(step.id,  AppNode(step.app, {}))

        for step in steps:
            # inputs
            for input_port, input_val in six.iteritems(step.inputs):
                inp = wrap_in_list(input_val)
                for item in inp:
                    self.add_edge_or_input(step, input_port, item)

            # outputs
            if step.outputs:
                for output_port, output_val in six.iteritems(step.outputs):
                    self.to[output_val['$to']] = output_port
                    if isinstance(step.app, WorkflowApp):
                        output_node = step.app.get_output(step.app.to.get(output_port))
                    else:
                        output_node = step.app.get_output(output_port)
                    output_id = output_val['$to']
                    self.add_node(output_id, output_node)
                    self.graph.add_edge(
                        step.id, output_id, OutputRelation(output_port)
                    )
                    # output_node.id = output_val['$to']
                    self.outputs.append(output_node)

        if not self.graph.connected():
            pass
            # raise ValidationError('Graph is not connected')

        schema = {
            "@type": "JsonSchema",
            "type": "object",
            "properties": {},
            "required": []
        }

        for inp in self.inputs:
            schema['properties'][inp.id] = inp.validator.schema
            if inp.required:
                schema['required'].append(inp.id)

        super(WorkflowApp, self).__init__(
            app_id, JsonSchema(context, schema), self.outputs,
            app_description=app_description,
            annotations=annotations,
            platform_features=platform_features
        )

    def add_edge_or_input(self, step, input_name, input_val):
        node_id = step.id
        if isinstance(input_val, dict) and '$from' in input_val:
            frm = wrap_in_list(input_val['$from'])
            for inp in frm:
                if '.' in inp:
                    node, outp = inp.split('.')
                    self.graph.add_edge(node, node_id, Relation(outp, input_name))
                else:
                    # TODO: merge input schemas if one input goes to different apps

                    input = step.app.get_input(input_name)
                    if inp not in self.graph.nodes:
                        self.add_node(inp, input)
                    self.graph.add_edge(
                        inp, node_id, InputRelation(input_name)
                    )
                    wf_input = copy.deepcopy(input)
                    wf_input.id = inp
                    self.inputs.append(wf_input)

        else:
            self.graph.node_data(node_id).inputs[input_name] = input_val

    # Graph.add_node silently fails if node already exists
    def add_node(self, node_id, node):
        if node_id in self.graph.nodes:
            raise ValidationError('Duplicate node ID: %s' % node_id)
        self.graph.add_node(node_id, node)

    def hide_nodes(self, type):
        for node_id in self.graph.node_list():
            node = self.graph.node_data(node_id)
            if isinstance(node, type):
                self.graph.hide_node(node_id)

    def run(self, job):
        eg = ExecutionGraph(self, job)
        while eg.has_next():
            next_id, next = eg.next_job()
            self.executor.execute(next, eg.job_done, next_id)
        return eg.outputs

    def to_dict(self, context):
        d = super(WorkflowApp, self).to_dict(context)
        d.update({
            "@type": "Workflow",
            'steps': [step.to_dict(context) for step in self.steps]
        })
        return d

    @classmethod
    def from_dict(cls, context, d):
        steps = [Step(
            step['id'], context.from_dict(step['app']),
            step['inputs'], step.get('outputs')
        )
            for step in d['steps']]

        return cls(
            d.get('@id', six.text_type(uuid4())),
            steps,
            context
        )