def testContainsIndexedSlices_PerReplica(self): t0 = math_ops._as_indexed_slices( constant_op.constant([[1., 2.], [0, 0], [3., 4.]])) t1 = math_ops._as_indexed_slices( constant_op.constant([[0., 0.], [5, 6], [7., 8.]])) per_replica = value_lib.PerReplica({"/gpu:0": t0, "/cpu:0": t1}) self.assertTrue(cross_device_utils.contains_indexed_slices(per_replica))
def testContainsIndexedSlices_PerReplica(self): t0 = math_ops._as_indexed_slices( constant_op.constant([[1., 2.], [0, 0], [3., 4.]])) t1 = math_ops._as_indexed_slices( constant_op.constant([[0., 0.], [5, 6], [7., 8.]])) per_replica = value_lib.PerReplica((t0, t1)) self.assertTrue(cross_device_utils.contains_indexed_slices(per_replica))
def _reduce(self, reduce_op, per_replica_value, destinations): if cross_device_utils.contains_indexed_slices(per_replica_value): raise ValueError( "`IndexSlices` is not supported for Collective All-Reduce.") if context.executing_eagerly(): raise ValueError( "Eager execution is not supported for Collective All-Reduce") all_reduced = self._batch_all_reduce(reduce_op, [per_replica_value])[0] device_map, logical_device = get_device_map_from(destinations) if (all_reduced.device_map is device_map and all_reduced.logical_device == logical_device): return all_reduced devices = device_map.logical_to_actual_devices(logical_device) index = [] for d in devices: if d in all_reduced.devices: index.append(all_reduced.get(d)) else: # TODO(josh11b): Once we add support for model parallelism, get the # copy from the corresponding replica instead of the primary. with ops.control_dependencies( all_reduced.values), ops.device(d): index.append(array_ops.identity(all_reduced.primary)) return value_lib.Mirrored(device_map, index, logical_device)
def batch_reduce_implementation(self, reduce_op, value_destination_pairs): all_devices_match = _all_devices_match(value_destination_pairs) contains_indexed_slices = cross_device_utils.contains_indexed_slices( value_destination_pairs) if (all_devices_match and not context.executing_eagerly() and not contains_indexed_slices): return self._batch_all_reduce( reduce_op, [v[0] for v in value_destination_pairs]) else: if not all_devices_match: logging.log_first_n( logging.WARN, "Efficient batch_reduce is not supported if " "destinations are different.", 10) return [ self.reduce_implementation(reduce_op, t, destinations=v) for t, v in value_destination_pairs ]
def batch_reduce_implementation(self, reduce_op, value_destination_pairs): if cross_device_utils.contains_indexed_slices(value_destination_pairs): raise ValueError( "`IndexSlices` is not supported for Collective All-Reduce.") all_devices_match = _all_devices_match(value_destination_pairs) if all_devices_match: return self._batch_all_reduce( reduce_op, [v[0] for v in value_destination_pairs]) else: if not all_devices_match: logging.log_first_n( logging.WARN, "Efficient batch_reduce is not supported if " "destinations are different.", 10) return [ self.reduce_implementation(reduce_op, t, destinations=v) for t, v in value_destination_pairs ]
def batch_reduce_implementation(self, reduce_op, value_destination_pairs): all_devices_match = _all_devices_match(value_destination_pairs) contains_indexed_slices = cross_device_utils.contains_indexed_slices( value_destination_pairs) if (all_devices_match and not context.executing_eagerly() and not contains_indexed_slices): return self._batch_all_reduce(reduce_op, [v[0] for v in value_destination_pairs]) else: if not all_devices_match: logging.log_first_n(logging.WARN, "Efficient batch_reduce is not supported if " "destinations are different.", 10) return [ self.reduce_implementation(reduce_op, t, destinations=v) for t, v in value_destination_pairs ]
def batch_reduce_implementation(self, reduce_op, value_destination_pairs): if cross_device_utils.contains_indexed_slices(value_destination_pairs): raise ValueError( "`IndexSlices` is not supported for Collective All-Reduce.") all_devices_match = _all_devices_match(value_destination_pairs) if all_devices_match: return self._batch_all_reduce(reduce_op, [v[0] for v in value_destination_pairs]) else: if not all_devices_match: logging.log_first_n( logging.WARN, "Efficient batch_reduce is not supported if " "destinations are different.", 10) return [ self.reduce_implementation(reduce_op, t, destinations=v) for t, v in value_destination_pairs ]
def _reduce(self, reduce_op, per_replica_value, destinations): contains_indexed_slices = cross_device_utils.contains_indexed_slices( per_replica_value) if (_devices_match(per_replica_value, destinations) and not context.executing_eagerly() and not contains_indexed_slices): return self._batch_all_reduce(reduce_op, [per_replica_value])[0] else: if contains_indexed_slices: logging.log_first_n( logging.WARN, "Efficient allreduce is not supported for IndexedSlices.", 10) if check_destinations(destinations): devices = get_devices_from(destinations) else: devices = get_devices_from(per_replica_value) reduce_to_device = devices[0] reduced = _simple_reduce(per_replica_value, reduce_to_device, math_ops.add_n, reduce_op) return self.broadcast(reduced, devices)
def _reduce(self, reduce_op, per_replica_value, destinations): contains_indexed_slices = cross_device_utils.contains_indexed_slices( per_replica_value) if (_devices_match(per_replica_value, destinations) and not context.executing_eagerly() and not contains_indexed_slices): return self._batch_all_reduce(reduce_op, [per_replica_value])[0] else: if contains_indexed_slices: logging.log_first_n( logging.WARN, "Efficient allreduce is not supported for IndexedSlices.", 10) if check_destinations(destinations): devices = get_devices_from(destinations) else: devices = get_devices_from(per_replica_value) reduce_to_device = devices[0] reduced = _simple_reduce(per_replica_value, reduce_to_device, math_ops.add_n, reduce_op) return self.broadcast(reduced, devices)
def reduce_implementation(self, reduce_op, per_replica_value, destinations): if cross_device_utils.contains_indexed_slices(per_replica_value): raise ValueError( "`IndexSlices` is not supported for Collective All-Reduce.") all_reduced = self._batch_all_reduce(reduce_op, [per_replica_value])[0] device_map, logical_device = get_device_map_from(destinations) if (all_reduced.device_map is device_map and all_reduced.logical_device == logical_device): return all_reduced devices = device_map.logical_to_actual_devices(logical_device) index = [] for d in devices: if d in all_reduced.devices: index.append(all_reduced.get(d)) else: # TODO(josh11b): Once we add support for model parallelism, get the # copy from the corresponding replica instead of the primary. with ops.control_dependencies(all_reduced.values), ops.device(d): index.append(array_ops.identity(all_reduced.primary)) return value_lib.Mirrored(device_map, index, logical_device)
def _reduce(self, reduce_op, per_replica_value, destinations): if cross_device_utils.contains_indexed_slices(per_replica_value): raise ValueError( "`IndexSlices` is not supported for Collective All-Reduce.") if context.executing_eagerly(): raise ValueError( "Eager execution is not supported for Collective All-Reduce") all_reduced = self._batch_all_reduce(reduce_op, [per_replica_value])[0] if _devices_match(per_replica_value, destinations): return all_reduced else: index = {} for d in get_devices_from(destinations): # pylint: disable=protected-access if d in all_reduced._index: index[d] = all_reduced._index[d] else: with ops.control_dependencies(list( all_reduced._index.values())), ops.device(d): index[d] = array_ops.identity(list(all_reduced._index.values())[0]) return value_lib.Mirrored(index)
def _reduce(self, reduce_op, per_replica_value, destinations): if cross_device_utils.contains_indexed_slices(per_replica_value): raise ValueError( "`IndexSlices` is not supported for Collective All-Reduce.") if context.executing_eagerly(): raise ValueError( "Eager execution is not supported for Collective All-Reduce") all_reduced = self._batch_all_reduce(reduce_op, [per_replica_value])[0] if _devices_match(per_replica_value, destinations): return all_reduced else: index = {} for d in get_devices_from(destinations): # pylint: disable=protected-access if d in all_reduced._index: index[d] = all_reduced._index[d] else: with ops.control_dependencies(list( all_reduced._index.values())), ops.device(d): index[d] = array_ops.identity(list(all_reduced._index.values())[0]) return value_lib.Mirrored(index)
def testContainsIndexedSlices_Tuple(self): t0 = math_ops._as_indexed_slices( constant_op.constant([[1., 2.], [0, 0], [3., 4.]])) t1 = math_ops._as_indexed_slices( constant_op.constant([[0., 0.], [5, 6], [7., 8.]])) self.assertTrue(cross_device_utils.contains_indexed_slices((t0, t1)))
def testIsIndexedSlices(self): t = math_ops._as_indexed_slices( constant_op.constant([[1., 2.], [0, 0], [3., 4.]])) self.assertTrue(cross_device_utils.contains_indexed_slices(t))
def testContainsIndexedSlices_Tuple(self): t0 = math_ops._as_indexed_slices( constant_op.constant([[1., 2.], [0, 0], [3., 4.]])) t1 = math_ops._as_indexed_slices( constant_op.constant([[0., 0.], [5, 6], [7., 8.]])) self.assertTrue(cross_device_utils.contains_indexed_slices((t0, t1)))
def testIsIndexedSlices(self): t = math_ops._as_indexed_slices( constant_op.constant([[1., 2.], [0, 0], [3., 4.]])) self.assertTrue(cross_device_utils.contains_indexed_slices(t))