Пример #1
0
def traverse_graph(starting_pks, max_iterations=None, get_links=False, links_forward=(), links_backward=()):
    """
    This function will return the set of all nodes that can be connected
    to a list of initial nodes through any sequence of specified links.
    Optionally, it may also return the links that connect these nodes.

    :type starting_pks: list or tuple or set
    :param starting_pks: Contains the (valid) pks of the starting nodes.

    :type max_iterations: int or None
    :param max_iterations:
        The number of iterations to apply the set of rules (a value of 'None' will
        iterate until no new nodes are added).

    :param bool get_links:
        Pass True to also return the links between all nodes (found + initial).

    :type links_forward: aiida.common.links.LinkType
    :param links_forward:
        List with all the links that should be traversed in the forward direction.

    :type links_backward: aiida.common.links.LinkType
    :param links_backward:
        List with all the links that should be traversed in the backward direction.
    """
    # pylint: disable=too-many-locals,too-many-statements,too-many-branches
    from aiida import orm
    from aiida.tools.graph.age_entities import Basket
    from aiida.tools.graph.age_rules import UpdateRule, RuleSequence, RuleSaveWalkers, RuleSetWalkers
    from aiida.common import exceptions

    if max_iterations is None:
        max_iterations = inf
    elif not (isinstance(max_iterations, int) or max_iterations is inf):
        raise TypeError('Max_iterations has to be an integer or infinity')

    linktype_list = []
    for linktype in links_forward:
        if not isinstance(linktype, LinkType):
            raise TypeError('links_forward should contain links, but one of them is: {}'.format(type(linktype)))
        linktype_list.append(linktype.value)
    filters_forwards = {'type': {'in': linktype_list}}

    linktype_list = []
    for linktype in links_backward:
        if not isinstance(linktype, LinkType):
            raise TypeError('links_backward should contain links, but one of them is: {}'.format(type(linktype)))
        linktype_list.append(linktype.value)
    filters_backwards = {'type': {'in': linktype_list}}

    if not isinstance(starting_pks, (list, set, tuple)):
        raise TypeError('starting_pks must be of type list, set or tuple\ninstead, it is {}'.format(type(starting_pks)))

    if not starting_pks:
        if get_links:
            output = {'nodes': set(), 'links': set()}
        else:
            output = {'nodes': set(), 'links': None}
        return output

    if any([not isinstance(pk, int) for pk in starting_pks]):
        raise TypeError('one of the starting_pks is not of type int:\n {}'.format(starting_pks))
    operational_set = set(starting_pks)

    query_nodes = orm.QueryBuilder()
    query_nodes.append(orm.Node, project=['id'], filters={'id': {'in': operational_set}})
    existing_pks = set(query_nodes.all(flat=True))
    missing_pks = operational_set.difference(existing_pks)
    if missing_pks:
        raise exceptions.NotExistent(
            'The following pks are not in the database and must be pruned before this   call: {}'.format(missing_pks)
        )

    rules = []
    basket = Basket(nodes=operational_set)

    # When max_iterations is finite, the order of traversal may affect the result
    # (its not the same to first go backwards and then forwards than vice-versa)
    # In order to make it order-independent, the result of the first operation needs
    # to be stashed and the second operation must be performed only on the nodes
    # that were already in the set at the begining of the iteration: this way, both
    # rules are applied on the same set of nodes and the order doesn't matter.
    # The way to do this is saving and seting the walkers at the right moments only
    # when both forwards and backwards rules are present.
    if links_forward and links_backward:
        stash = basket.get_template()
        rules += [RuleSaveWalkers(stash)]

    if links_forward:
        query_outgoing = orm.QueryBuilder()
        query_outgoing.append(orm.Node, tag='sources')
        query_outgoing.append(orm.Node, edge_filters=filters_forwards, with_incoming='sources')
        rule_outgoing = UpdateRule(query_outgoing, max_iterations=1, track_edges=get_links)
        rules += [rule_outgoing]

    if links_forward and links_backward:
        rules += [RuleSetWalkers(stash)]

    if links_backward:
        query_incoming = orm.QueryBuilder()
        query_incoming.append(orm.Node, tag='sources')
        query_incoming.append(orm.Node, edge_filters=filters_backwards, with_outgoing='sources')
        rule_incoming = UpdateRule(query_incoming, max_iterations=1, track_edges=get_links)
        rules += [rule_incoming]

    rulesequence = RuleSequence(rules, max_iterations=max_iterations)

    results = rulesequence.run(basket)

    output = {}
    output['nodes'] = results.nodes.keyset
    output['links'] = None
    if get_links:
        output['links'] = results['nodes_nodes'].keyset

    return output
Пример #2
0
    def test_stash(self):
        """Testing sequencies and 'stashing'

        Testing the dependency on the order of the operations in RuleSequence and the
        'stash' functionality. This will be performed in a graph that has a calculation
        (calc_ca) with two input data nodes (data_i1 and data_i2) and two output data
        nodes (data_o1 and data_o2), and another calculation (calc_cb) which takes both
        one of the inputs and one of the outputs of the first one (data_i2 and data_o2)
        as inputs to produce a final output (data_o3).
        """
        nodes = self._create_branchy_graph()
        basket = Basket(nodes=[nodes['data_1'].id])

        queryb_inp = orm.QueryBuilder()
        queryb_inp.append(orm.Node, tag='nodes_in_set')
        queryb_inp.append(orm.Node, with_outgoing='nodes_in_set')
        uprule_inp = UpdateRule(queryb_inp)
        queryb_out = orm.QueryBuilder()
        queryb_out.append(orm.Node, tag='nodes_in_set')
        queryb_out.append(orm.Node, with_incoming='nodes_in_set')
        uprule_out = UpdateRule(queryb_out)

        expect_base = set([nodes['calc_1'].id, nodes['data_1'].id, nodes['calc_2'].id])

        # First get outputs, then inputs.
        rule_seq = RuleSequence((uprule_out, uprule_inp))
        obtained = rule_seq.run(basket.copy())['nodes'].keyset
        expected = expect_base.union(set([nodes['data_i'].id]))
        self.assertEqual(obtained, expected)

        # First get inputs, then outputs.
        rule_seq = RuleSequence((uprule_inp, uprule_out))
        obtained = rule_seq.run(basket.copy())['nodes'].keyset
        expected = expect_base.union(set([nodes['data_o'].id]))
        self.assertEqual(obtained, expected)

        # Now using the stash option in either order.
        stash = basket.get_template()
        rule_save = RuleSaveWalkers(stash)
        rule_load = RuleSetWalkers(stash)

        # Checking whether Rule does the right thing
        # (i.e. If I stash the result, the operational sets should be the original,
        # set, whereas the stash contains the same data as the starting point)
        obtained = rule_save.run(basket.copy())
        expected = basket.copy()
        self.assertEqual(obtained, expected)
        self.assertEqual(stash, basket)

        stash = basket.get_template()
        rule_save = RuleSaveWalkers(stash)
        rule_load = RuleSetWalkers(stash)
        serule_io = RuleSequence((rule_save, uprule_inp, rule_load, uprule_out))
        result_io = serule_io.run(basket.copy())['nodes'].keyset
        self.assertEqual(result_io, expect_base)

        stash = basket.get_template()
        rule_save = RuleSaveWalkers(stash)
        rule_load = RuleSetWalkers(stash)
        serule_oi = RuleSequence((rule_save, uprule_out, rule_load, uprule_inp))
        result_oi = serule_oi.run(basket.copy())['nodes'].keyset
        self.assertEqual(result_oi, expect_base)
Пример #3
0
def traverse_graph(
    starting_pks: Iterable[int],
    max_iterations: Optional[int] = None,
    get_links: bool = False,
    links_forward: Iterable[LinkType] = (),
    links_backward: Iterable[LinkType] = (),
    missing_callback: Optional[Callable[[Iterable[int]], None]] = None
) -> TraverseGraphOutput:
    """
    This function will return the set of all nodes that can be connected
    to a list of initial nodes through any sequence of specified links.
    Optionally, it may also return the links that connect these nodes.

    :param starting_pks: Contains the (valid) pks of the starting nodes.

    :param max_iterations:
        The number of iterations to apply the set of rules (a value of 'None' will
        iterate until no new nodes are added).

    :param get_links: Pass True to also return the links between all nodes (found + initial).

    :param links_forward: List with all the links that should be traversed in the forward direction.
    :param links_backward: List with all the links that should be traversed in the backward direction.

    :param missing_callback: A callback to handle missing starting_pks or if None raise NotExistent
    """
    # pylint: disable=too-many-locals,too-many-statements,too-many-branches

    if max_iterations is None:
        max_iterations = cast(int, inf)
    elif not (isinstance(max_iterations, int) or max_iterations is inf):
        raise TypeError('Max_iterations has to be an integer or infinity')

    linktype_list = []
    for linktype in links_forward:
        if not isinstance(linktype, LinkType):
            raise TypeError(
                f'links_forward should contain links, but one of them is: {type(linktype)}'
            )
        linktype_list.append(linktype.value)
    filters_forwards = {'type': {'in': linktype_list}}

    linktype_list = []
    for linktype in links_backward:
        if not isinstance(linktype, LinkType):
            raise TypeError(
                f'links_backward should contain links, but one of them is: {type(linktype)}'
            )
        linktype_list.append(linktype.value)
    filters_backwards = {'type': {'in': linktype_list}}

    if not isinstance(starting_pks, Iterable):  # pylint: disable=isinstance-second-argument-not-valid-type
        raise TypeError(
            f'starting_pks must be an iterable\ninstead, it is {type(starting_pks)}'
        )

    if any([not isinstance(pk, int) for pk in starting_pks]):
        raise TypeError(
            f'one of the starting_pks is not of type int:\n {starting_pks}')
    operational_set = set(starting_pks)

    if not operational_set:
        if get_links:
            return {'nodes': set(), 'links': set()}
        return {'nodes': set(), 'links': None}

    query_nodes = orm.QueryBuilder()
    query_nodes.append(orm.Node,
                       project=['id'],
                       filters={'id': {
                           'in': operational_set
                       }})
    existing_pks = set(query_nodes.all(flat=True))
    missing_pks = operational_set.difference(existing_pks)
    if missing_pks and missing_callback is None:
        raise exceptions.NotExistent(
            f'The following pks are not in the database and must be pruned before this call: {missing_pks}'
        )
    elif missing_pks and missing_callback is not None:
        missing_callback(missing_pks)

    rules = []
    basket = Basket(nodes=existing_pks)

    # When max_iterations is finite, the order of traversal may affect the result
    # (its not the same to first go backwards and then forwards than vice-versa)
    # In order to make it order-independent, the result of the first operation needs
    # to be stashed and the second operation must be performed only on the nodes
    # that were already in the set at the begining of the iteration: this way, both
    # rules are applied on the same set of nodes and the order doesn't matter.
    # The way to do this is saving and seting the walkers at the right moments only
    # when both forwards and backwards rules are present.
    if links_forward and links_backward:
        stash = basket.get_template()
        rules += [RuleSaveWalkers(stash)]

    if links_forward:
        query_outgoing = orm.QueryBuilder()
        query_outgoing.append(orm.Node, tag='sources')
        query_outgoing.append(orm.Node,
                              edge_filters=filters_forwards,
                              with_incoming='sources')
        rule_outgoing = UpdateRule(query_outgoing,
                                   max_iterations=1,
                                   track_edges=get_links)
        rules += [rule_outgoing]

    if links_forward and links_backward:
        rules += [RuleSetWalkers(stash)]

    if links_backward:
        query_incoming = orm.QueryBuilder()
        query_incoming.append(orm.Node, tag='sources')
        query_incoming.append(orm.Node,
                              edge_filters=filters_backwards,
                              with_outgoing='sources')
        rule_incoming = UpdateRule(query_incoming,
                                   max_iterations=1,
                                   track_edges=get_links)
        rules += [rule_incoming]

    rulesequence = RuleSequence(rules, max_iterations=max_iterations)

    results = rulesequence.run(basket)

    output = {}
    output['nodes'] = results.nodes.keyset
    output['links'] = None
    if get_links:
        output['links'] = results['nodes_nodes'].keyset

    return cast(TraverseGraphOutput, output)