def choose_the_best(devices, session_config=None): """Find the best CrossDeviceOps locally given a `tf.compat.v1.ConfigProto`. Args: devices: a list of devices passed to `tf.distribute.Strategy`. session_config: a `tf.compat.v1.ConfigProto` or `None`. If `None`, it will make decision based on all local devices. Returns: A subclass of `CrossDeviceOps`. """ requested_devices = set([device_util.canonicalize(d) for d in devices]) machine_devices = device_lib.list_local_devices(session_config=session_config) using_devices = set() for d in machine_devices: if device_util.canonicalize(d.name) in requested_devices: using_devices.add(d.name) if len(using_devices) != len(requested_devices): logging.warning( "Some requested devices in `tf.distribute.Strategy` are not visible " "to TensorFlow: %s", ",".join(list(requested_devices - using_devices))) return ReductionToOneDevice() if any("gpu" not in d.lower() for d in using_devices): logging.warning("There is non-GPU devices in `tf.distribute.Strategy`, not " "using nccl allreduce.") return ReductionToOneDevice() if kernels.get_registered_kernels_for_op("NcclAllReduce"): return NcclAllReduce(num_packs=1) else: logging.warning("Nccl kernel is not found, not using nccl allreduce.") return ReductionToOneDevice()
def placer(op): if all(['CPU' in kernel_def.device_type for kernel_def in kernels.get_registered_kernels_for_op(op.type).kernel]): # It assumes an op has a CPU kernel by default. new_device = DeviceSpecV2.from_string(self._local_worker_device). \ replace(device_type='CPU', device_index=0) else: new_device = DeviceSpecV2.from_string(replica_device) return new_device
def testNcclHintFallbackToRingReduce(self): """Tests that setting `communication_hint=nccl` works on non-GPU builds.""" if kernels.get_registered_kernels_for_op('NcclAllReduce'): self.skipTest('Run only on non-GPU environments') self._testCollectiveReduce( inputs=[[0.1, 1.1, 2.1, 3.1, 4.1, 5.1, 6.1, 7.1], [0.3, 1.3, 2.3, 3.3, 4.3, 5.3, 6.3, 7.3]], expected=[0.2, 1.2, 2.2, 3.2, 4.2, 5.2, 6.2, 7.2], set_graph_key=False, communication_hint='nccl')
def choose_the_best(devices, session_config=None): """Find the best CrossDeviceOps locally given a `tf.compat.v1.ConfigProto`. Args: devices: a list of devices passed to `tf.distribute.Strategy`. session_config: a `tf.compat.v1.ConfigProto` or `None`. If `None`, it will make decision based on all logical devices. Returns: A subclass of `CrossDeviceOps`. """ requested_devices = set(device_util.canonicalize(d) for d in devices) if ops.executing_eagerly_outside_functions(): logical_gpus = context.context().list_logical_devices( device_type="GPU") physical_gpus = context.context().list_physical_devices( device_type="GPU") if len(logical_gpus) != len(physical_gpus): logging.warning( "NCCL is not supported when using virtual GPUs, falling" "back to reduction to one device") return ReductionToOneDevice() machine_devices = context.context().list_logical_devices() else: machine_devices = device_lib.list_local_devices( session_config=session_config) using_devices = set() for d in machine_devices: if device_util.canonicalize(d.name) in requested_devices: using_devices.add(d.name) if len(using_devices) != len(requested_devices): logging.warning( "Some requested devices in `tf.distribute.Strategy` are not visible " "to TensorFlow: %s", ",".join(list(requested_devices - using_devices))) if any("gpu" not in d.lower() for d in requested_devices): logging.warning( "There are non-GPU devices in `tf.distribute.Strategy`, " "not using nccl allreduce.") return ReductionToOneDevice() if kernels.get_registered_kernels_for_op("NcclAllReduce"): return NcclAllReduce(num_packs=1) else: logging.warning("Nccl kernel is not found, not using nccl allreduce.") return ReductionToOneDevice()
def testFindsAtLeastOneKernel(self): kernel_list = kernels.get_registered_kernels_for_op("KernelLabel") self.assertGreater(len(kernel_list.kernel), 0) self.assertEqual(kernel_list.kernel[0].op, "KernelLabel")