Exemplo n.º 1
0
    def _create_new_op(self, *args, **kwargs):
        """
        Handles an operation call to the current node and returns the new node
        built using the operation call.
        """
        # Create a new `Operation` object for the
        # incoming operation call
        op = Operation(self.proxied_node._new_op_name, *args, **kwargs)

        # Create a new `Node` object to house the operation
        newNode = Node(operation=op, get_head=self.proxied_node.get_head)

        # Logger debug statements
        logger.debug("Created new {} node".format(op.name))

        # Add the new node as a child of the current node
        self.proxied_node.children.append(newNode)

        # Return the appropriate proxy object for the node
        if op.is_action():
            return ActionProxy(newNode)
        elif op.name in ["AsNumpy", "Snapshot"]:
            headnode = self.proxied_node.get_head()
            generator = ComputationGraphGenerator(headnode)
            headnode.backend.execute(generator)
            return newNode.value
        else:
            return TransformationProxy(newNode)
Exemplo n.º 2
0
    def __setstate__(self, state):
        """
        Retrieves the state dictionary of the current
        node and sets the instance variables.

        Args:
            state (dict): This is the state dictionary that needs to be
                converted to a `Node` object.

        """
        self.children = state['children']
        if state.get('operation_name'):
            self.operation = Operation(state['operation_name'],
                                       *state['operation_args'],
                                       **state["operation_kwargs"])
        else:
            self.operation = None
Exemplo n.º 3
0
class Node(object):
    """
    A Class that represents a node in RDataFrame operations graph. A Node
    houses an operation and has references to children nodes.
    For details on the types of operations supported, try :

    Example::

        import DistRDF
        DistRDF.use(...) # Choose your backend
        print(DistRDF.current_backend.supported_operations)

    Attributes:
        get_head (function): A lambda function that returns the head node of
            the current graph.

        operation: The operation that this Node represents.
            This could be :obj:`None`.

        children (list): A list of :obj:`DistRDF.Node` objects which represent
            the children nodes connected to the current node.

        _new_op_name (str): The name of the new incoming operation of the next
            child, which is the last child node among the current node's
            children.

        value: The computed value after executing the operation in the current
            node for a particular DistRDF graph. This is permanently :obj:`None`
            for transformation nodes and the action nodes get a
            :obj:`ROOT.RResultPtr` after event-loop execution.

        pyroot_node: Reference to the PyROOT object that implements the
            functionality of this node on the cpp side.

        has_user_references (bool): A flag to check whether the node has
            direct user references, that is if it is assigned to a variable.
            Default value is :obj:`True`, turns to :obj:`False` if the proxy
            that wraps the node gets garbage collected by Python.
    """

    def __init__(self, get_head, operation, *args):
        """
        Creates a new node based on the operation passed as argument.

        Args:
            get_head (function): A lambda function that returns the head node
                of the current graph. This value could be `None`.

            operation (DistRDF.Operation.Operation): The operation that this Node
                represents. This could be :obj:`None`.
        """
        if get_head is None:
            # Function to get 'head' Node
            self.get_head = lambda: self
        else:
            self.get_head = get_head

        self.operation = operation
        self.children = []
        self._new_op_name = ""
        self.value = None
        self.pyroot_node = None
        self.has_user_references = True

    def __getstate__(self):
        """
        Converts the state of the current node
        to a Python dictionary.

        Returns:
            dictionary: A dictionary that stores all instance variables
            that represent the current DistRDF node.

        """
        state_dict = {'children': self.children}
        if self.operation:
            state_dict['operation_name'] = self.operation.name
            state_dict['operation_args'] = self.operation.args
            state_dict['operation_kwargs'] = self.operation.kwargs

        return state_dict

    def __setstate__(self, state):
        """
        Retrieves the state dictionary of the current
        node and sets the instance variables.

        Args:
            state (dict): This is the state dictionary that needs to be
                converted to a `Node` object.

        """
        self.children = state['children']
        if state.get('operation_name'):
            self.operation = Operation(state['operation_name'],
                                       *state['operation_args'],
                                       **state["operation_kwargs"])
        else:
            self.operation = None

    def is_prunable(self):
        """
        Checks whether the current node can be pruned from the computational
        graph.

        Returns:
            bool: True if the node has no children and no user references or
            its value has already been computed, False otherwise.
        """
        if not self.children:
            # Every pruning condition is written on a separate line
            if not self.has_user_references or \
               (self.operation and self.operation.is_action() and self.value):

                # ***** Condition 1 *****
                # If the node is wrapped by a proxy which is not directly
                # assigned to a variable, then it will be flagged for pruning

                # ***** Condition 2 *****
                # If the current node's value was already
                # computed, it should get pruned only if it's
                # an Action node.

                # Logger debug statements
                logger.debug("{} node can be pruned".format(
                    self.operation.name
                ))

                return True

        # Logger debug statements
        if self.operation:  # Node has an operation
            logger.debug("{} node shouldn't be pruned".format(
                self.operation.name
            ))
        else:  # Node is the RDataFrame
            logger.debug("Graph pruning completed")
        return False

    def graph_prune(self):
        """
        Prunes nodes from the current DistRDF graph under certain conditions.
        The current node will be pruned if it has no children and the user
        application does not hold any reference to it. The children of the
        current node will get recursively pruned.

        Returns:
            bool: True if the current node has to be pruned, False otherwise.
        """
        children = []

        # Logger debug statements
        if self.operation:
            logger.debug("Checking {} node for pruning".format(
                self.operation.name
            ))
        else:
            logger.debug("Starting computational graph pruning")

        for n in self.children:
            # Logger debug statement
            # Select children based on pruning condition
            if not n.graph_prune():
                children.append(n)

        self.children = children
        return self.is_prunable()
Exemplo n.º 4
0
 def test_action(self):
     """Action nodes are classified accurately."""
     op = Operation("Count")
     self.assertEqual(op.op_type, Operation.Types.ACTION)
Exemplo n.º 5
0
 def test_without_args_and_kwargs(self):
     """Check Operation constructor without arguments."""
     op = Operation("Define")
     self.assertEqual(op.args, [])
     self.assertEqual(op.kwargs, {})
Exemplo n.º 6
0
 def test_with_args_and_kwargs(self):
     """Check that named and unnamed arguments are properly set."""
     op = Operation("Define", 2, "p", a=1, b="b")
     self.assertEqual(op.args, [2, "p"])
     self.assertEqual(op.kwargs, {"a": 1, "b": "b"})
Exemplo n.º 7
0
 def test_without_kwargs(self):
     """Check that unnamed arguments are properly set."""
     op = Operation("Define", 1, "b")
     self.assertEqual(op.args, [1, "b"])
     self.assertEqual(op.kwargs, {})
Exemplo n.º 8
0
 def test_none(self):
     """Incorrect operations raise an Exception."""
     with self.assertRaises(Exception):
         Operation("random")
Exemplo n.º 9
0
 def test_transformation(self):
     """Transformation nodes are classified accurately."""
     op = Operation("Define", "c1")
     self.assertEqual(op.op_type, Operation.Types.TRANSFORMATION)
Exemplo n.º 10
0
 def test_instant_action(self):
     """Instant actions are classified accurately."""
     op = Operation("Snapshot")
     self.assertEqual(op.op_type, Operation.Types.INSTANT_ACTION)