Пример #1
0
def broadcast(tensor, root_rank, scope='', name=None, is_variable=True):
    """An op which broadcasts the input tensor on root rank to the same input tensor
    on all other BytePS processes.
    The broadcast operation is keyed by the name of the op. The tensor type and
    shape must be the same on all BytePS processes for a given name. The broadcast
    will not start until all processes are ready to send and receive the tensor.
    Returns:
      A tensor of the same shape and type as `tensor`, with the value broadcasted
      from root rank.
    """
    # Broadcast is implemented as push + pull after zero-ing non-root tensors
    if name is None and not _executing_eagerly():
        name = 'BytePSBroadcast_%s' % _normalize_name(tensor.name)
    if scope == '' and not _executing_eagerly():
        if 'v1' in dir(tf.compat):
            scope = tf.compat.v1.get_default_graph().get_name_scope()
        else:
            scope = tf.get_default_graph().get_name_scope()
        if scope != '':
            scope += '/'
    full_name = scope + name
    full_name = full_name.encode("ascii")
    TF_LIB_CTYPES.byteps_tensorflow_declare_tensor(ctypes.c_char_p(full_name))
    if root_rank != rank():
        if is_variable:
            with tf.control_dependencies([tf.assign_sub(tensor, tensor)]):
                return C_LIB.byteps_push_pull(tensor, name=name)
        else:
            with tf.device(tensor.device):
                input_tensor = tf.zeros_like(tensor)
            return C_LIB.byteps_push_pull(input_tensor, name=name)
    else:
        return C_LIB.byteps_push_pull(tensor, name=name)
Пример #2
0
def _push_pull(tensor, scope='', name=None):
    """An op which sums an input tensor over all the BytePS processes.
    The reduction operation is keyed by the name of the op. The tensor type and
    shape must be the same on all BytePS processes for a given name. The reduction
    will not start until all processes are ready to send and receive the tensor.
    Returns:
      A tensor of the same shape and type as `tensor`, summed across all
      processes.
    """
    if name is None and not _executing_eagerly():
        name = 'BytePSPushPull_%s' % _normalize_name(tensor.name)
    if scope == '' and not _executing_eagerly():
        if 'v1' in dir(tf.compat):
            scope = tf.compat.v1.get_default_graph().get_name_scope()
        else:
            scope = tf.get_default_graph().get_name_scope()
        if scope != '':
            scope += '/'
    if not name:
        name = ''
    full_name = scope + name
    if not full_name:
        full_name = "empty_name_" + randomString()
    full_name_ascii = full_name.encode("ascii")
    TF_LIB_CTYPES.byteps_tensorflow_declare_tensor(
        ctypes.c_char_p(full_name_ascii))
    return C_LIB.byteps_push_pull(tensor, name=name, input_name=full_name)
Пример #3
0
    def __init__(self, optimizer, name=None, use_locking=False, device_dense='',
                 device_sparse='', compression=Compression.none,
                 sparse_as_dense=False):
        """Construct a new DistributedOptimizer, which uses another optimizer
        under the hood for computing single-process gradient values and
        applying gradient updates after the gradient values have been averaged
        across all the BytePS ranks.
        Args:
          optimizer:
            Optimizer to use for computing gradients and applying updates.
          name:
            Optional name prefix for the operations created when applying
            gradients. Defaults to "Distributed" followed by the provided
            optimizer type.
          use_locking:
            Whether to use locking when updating variables.
            See Optimizer.__init__ for more info.
          device_dense:
            Device to be used for dense tensors. Uses GPU by default.
          device_sparse:
            Device to be used for sparse tensors. Uses GPU by default.
          compression:
            Compression algorithm used during push_pull to reduce the amount
            of data sent during the each parameter update step.  Defaults to
            not using compression.
          sparse_as_dense:
            Treat all sparse gradients as dense tensors.  This can help improve
            performance and memory utilization if the original sparse gradient
            has high density.  Defaults to false.
        """
        if name is None:
            name = "Distributed{}".format(type(optimizer).__name__)

        self._optimizer = optimizer
        self._device_dense = device_dense
        self._device_sparse = device_sparse
        self._compression = compression
        self._sparse_as_dense = sparse_as_dense

        def push_pull_grads(grads):
            with tf.name_scope(self._name + "_Push_Pull") as scope:
                if self._sparse_as_dense:
                    grads = [tf.convert_to_tensor(grad)
                             if grad is not None and isinstance(grad, tf.IndexedSlices)
                             else grad for grad in grads]

                return [push_pull(grad, scope,
                                  device_dense=self._device_dense,
                                  device_sparse=self._device_sparse,
                                  compression=self._compression)
                        if grad is not None else grad
                        for grad in grads]

        if _executing_eagerly():
            self._push_pull_grads = tf.contrib.eager.defun(push_pull_grads)
        else:
            self._push_pull_grads = push_pull_grads

        super(DistributedOptimizer, self).__init__(
            name=name, use_locking=use_locking)
Пример #4
0
        def __init__(self,
                     optimizer,
                     name=None,
                     use_locking=False,
                     device_dense='',
                     device_sparse='',
                     compression=Compression.none,
                     sparse_as_dense=False,
                     op=Average):
            if name is None:
                name = "Distributed{}".format(type(optimizer).__name__)
            super(_DistributedOptimizer,
                  self).__init__(name=name, use_locking=use_locking)

            self._optimizer = optimizer
            self._device_dense = device_dense
            self._device_sparse = device_sparse
            self._compression = compression
            self._sparse_as_dense = sparse_as_dense

            self._enable_async = (int(os.getenv('BYTEPS_ENABLE_ASYNC', 0)) !=
                                  0)
            if self._enable_async:
                assert int(os.getenv('DMLC_NUM_WORKER')) > 1, \
                    "Async is only valid for distributed training"
                print('BytePS: enable asynchronous training')

            def push_pull_grads(grads):
                with tf.name_scope(self._name + "_Push_Pull") as scope:
                    if self._sparse_as_dense:
                        grads = [
                            tf.convert_to_tensor(grad) if grad is not None
                            and isinstance(grad, tf.IndexedSlices) else grad
                            for grad in grads
                        ]

                    return [
                        push_pull(grad,
                                  scope,
                                  device_dense=self._device_dense,
                                  device_sparse=self._device_sparse,
                                  compression=self._compression,
                                  enable_async=self._enable_async)
                        if grad is not None else grad for grad in grads
                    ]

            if _executing_eagerly():
                self._push_pull_grads = tf.contrib.eager.defun(push_pull_grads)
            else:
                self._push_pull_grads = push_pull_grads
Пример #5
0
def _push_pull(tensor, scope='', name=None):
    """An op which sums an input tensor over all the BytePS processes.
    The reduction operation is keyed by the name of the op. The tensor type and
    shape must be the same on all BytePS processes for a given name. The reduction
    will not start until all processes are ready to send and receive the tensor.
    Returns:
      A tensor of the same shape and type as `tensor`, summed across all
      processes.
    """
    if name is None and not _executing_eagerly():
        name = 'BytePSPushPull_%s' % _normalize_name(tensor.name)
    TF_LIB_CTYPES.byteps_tensorflow_declare_tensor(
        ctypes.c_char_p(scope + name))
    return C_LIB.byteps_push_pull(tensor, name=name)
Пример #6
0
    def broadcast_global_variables(root_rank):
        """Broadcasts all global variables from root rank to all other processes.

        **NOTE:** deprecated in TensorFlow 2.0.

        Arguments:
            root_rank: rank of the process from which global variables will be broadcasted
                       to all other processes.
        """
        if _executing_eagerly():
            raise RuntimeError(
                "bps.broadcast_global_variables() does not support eager execution. "
                "Please use `bps.broadcast_variables(<model/optimizer variables>)` instead."
            )

        return broadcast_variables(_global_variables(), root_rank)
Пример #7
0
def broadcast(tensor, root_rank, name=None, is_variable=True):
    """An op which broadcasts the input tensor on root rank to the same input tensor
    on all other BytePS processes.
    The broadcast operation is keyed by the name of the op. The tensor type and
    shape must be the same on all BytePS processes for a given name. The broadcast
    will not start until all processes are ready to send and receive the tensor.
    Returns:
      A tensor of the same shape and type as `tensor`, with the value broadcasted
      from root rank.
    """
    # Broadcast is implemented as push + pull after zero-ing non-root tensors
    if name is None and not _executing_eagerly():
        name = 'BytePSBroadcast_%s' % _normalize_name(tensor.name)
    TF_LIB_CTYPES.byteps_tensorflow_declare_tensor(ctypes.c_char_p(name))
    if is_variable and (root_rank != rank()):
        return C_LIB.byteps_push_pull(tensor.assign(tf.zeros_like(tensor)),
                                      name=name)
    else:
        # TODO: needs to zero-out non-variable tensors, too
        return C_LIB.byteps_push_pull(tensor, name=name)