def _reduce_non_distributed_value(extended, reduce_op, value, destinations): """Reduce a non-DistributedValue `value` to `destinations`.""" if isinstance(value, values.DistributedValues): raise ValueError("You are passing a `DistributedValue` to " "`_reduce_non_distributed_value`, which is not allowed.") # If the same value is present on all replicas then the PerReplica value will # be a single value. We also handle the case when `value` is a single value # and equal to 0. if value == 0: return 0 # If there is only a single value and the reduce op is MEAN, # that value should be on all destinations. if reduce_op == reduce_util.ReduceOp.MEAN: return value cross_device_ops_lib.validate_destinations(destinations) # We do not support a reduce op of SUM if the value is the same across # all replicas. We call this as part of assign functions for MirroredVariables # and summing up identical values across replicas is not clearly defined. if (len(extended.worker_devices) != 1 or not cross_device_ops_lib.check_destinations(destinations)): raise ValueError("A non-DistributedValues value %s cannot be reduced with " "the given reduce op %s." % (value, reduce_op)) # TODO(anjalisridhar): Moves these methods to a device utility file? devices = cross_device_ops_lib.get_devices_from(destinations) if len(devices) == 1: with ops.device(devices[0]): return array_ops.identity(value) else: value_updates = {} for d in devices: with ops.device(d): value_updates[d] = array_ops.identity(value) return values.Mirrored(value_updates)
def _broadcast_to(self, tensor, destinations): # This is both a fast path for Python constants, and a way to delay # converting Python values to a tensor until we know what type it # should be converted to. Otherwise we have trouble with: # global_step.assign_add(1) # since the `1` gets broadcast as an int32 but global_step is int64. if isinstance(tensor, (float, int)): return tensor if not cross_device_ops_lib.check_destinations(destinations): destinations = self._compute_devices return self._cross_device_ops.broadcast(tensor, destinations)
def _broadcast_to(self, tensor, destinations): # This is both a fast path for Python constants, and a way to delay # converting Python values to a tensor until we know what type it # should be converted to. Otherwise we have trouble with: # global_step.assign_add(1) # since the `1` gets broadcast as an int32 but global_step is int64. if isinstance(tensor, (float, int)): return tensor if not cross_device_ops_lib.check_destinations(destinations): # TODO(josh11b): Use current logical device instead of 0 here. destinations = values.LogicalDeviceSpec( device_map=self._device_map, logical_device=0) return self._cross_device_ops.broadcast(tensor, destinations)
def reduce_implementation(self, reduce_op, per_replica_value, destinations): if tf_cross_device_ops.check_destinations(destinations): devices = tf_cross_device_ops.get_devices_from(destinations) else: devices = tf_cross_device_ops.get_devices_from(per_replica_value) reduce_to_device = devices[0] logging.log_first_n(logging.INFO, "Using byteps push pull to aggregate values", 1) reduced = _simple_reduce(per_replica_value, reduce_to_device, self.accumulation_fn, reduce_op) if size() > 1: reduced = _push_pull(reduced) return reduced
def _broadcast_to(self, tensor, destinations): if not cross_device_ops_lib.check_destinations(destinations): destinations = self._compute_devices return self._cross_device_ops.broadcast(tensor, destinations)