Python InputWorkers примеры, tensorflow.python.distribute.values.InputWorkers Python примеры использования

Пример #1

0

Показать файл

Файл: values_test.py Проект: shanksXU/tensorflow-1

  def testInitializableIterator(self):
    with context.graph_mode():
      devices = ["/device:CPU:0"]
      # Using random input since that is only allowed with initializable
      # iterator.
      dataset = dataset_ops.Dataset.from_tensor_slices(
          random_ops.random_uniform((10,)))

      device_map = values.ReplicaDeviceMap(devices)
      input_workers = values.InputWorkers(device_map)
      per_replica_dataset = values.PerReplicaDataset(dataset, input_workers, 0)
      iterator = per_replica_dataset.make_initializable_iterator()

      self.evaluate(iterator.initializer)
      next_element = iterator.get_next_as_list()
      for _ in range(10):
        self.evaluate(next_element)

      # Should fail after the input is finished.
      with self.assertRaises(errors.OutOfRangeError):
        self.evaluate(next_element)

      # After re-initializing the iterator, should be able to iterate again.
      self.evaluate(iterator.initializer)
      for _ in range(10):
        self.evaluate(next_element)

Пример #2

0

Показать файл

Файл: mirrored_strategy.py Проект: xiyueZhang/tensorflow

  def _initialize_multi_worker(self, devices):
    """Initializes the object for multi-worker training."""
    self._local_mode = False

    assert devices, "Must specify at least one device."
    devices = tuple(device_util.resolve(d) for d in devices)
    assert len(set(devices)) == len(devices), (
        "No duplicates allowed in `devices` argument: %s" % devices)
    # TODO(josh11b): Require at least 2 devices?

    device_dict = _group_device_list(devices)
    workers = []
    worker_devices = []
    for job in ("chief", "worker"):
      for task in range(len(device_dict.get(job, []))):
        worker = "/job:%s/task:%d" % (job, task)
        workers.append(worker)
        worker_devices.append((worker, device_dict[job][task]))

    # Setting `_default_device` will add a device scope in the
    # distribution.scope. We set the default device to the first worker. When
    # users specify device under distribution.scope by
    #   with tf.device("/cpu:0"):
    #     ...
    # their ops will end up on the cpu device of its first worker, e.g.
    # "/job:worker/task:0/device:CPU:0". Note this is not used in replica mode.
    self._default_device = workers[0]

    self._device_map = values.ReplicaDeviceMap(devices)
    self._input_workers = values.InputWorkers(self._device_map, worker_devices)
    self._inferred_cross_device_ops = cross_device_ops_lib.MultiWorkerAllReduce(
        workers, _infer_num_gpus_per_worker(devices))

Пример #3

0

Показать файл

    def _initialize_local(self, num_gpus_per_worker):
        """Initialize internal devices for local training."""
        worker_device = device_util.canonicalize("/device:CPU:0")
        # Define compute devices which is a list of device strings and one for each
        # replica. When there are GPUs, replicate operations on these GPUs.
        # Otherwise, place operations on CPU.
        if num_gpus_per_worker > 0:
            compute_devices = tuple(
                map("/device:GPU:{}".format, range(num_gpus_per_worker)))
        else:
            compute_devices = (_LOCAL_CPU, )

        self._device_map = values.ReplicaDeviceMap(compute_devices)
        self._input_workers = values.InputWorkers(
            self._device_map, [(worker_device, compute_devices)])

        # If there is only one GPU, put everything on that GPU. Otherwise, place
        # variables on CPU.
        if num_gpus_per_worker == 1:
            assert len(compute_devices) == 1
            self._variable_device = _LOCAL_GPU_0
            self._parameter_devices = (_LOCAL_GPU_0, )
        else:
            self._variable_device = _LOCAL_CPU
            self._parameter_devices = (_LOCAL_CPU, )

        self._is_chief = True
        self._cluster_spec = None
        self._task_type = None
        self._task_id = None

        logging.info(
            "ParameterServerStrategy with compute_devices = %r, "
            "variable_device = %r", compute_devices, self._variable_device)

Пример #4

0

Показать файл

Файл: one_device_strategy.py Проект: ziyin-dl/tensorflow

 def __init__(self, container_strategy, device):
   super(OneDeviceExtended, self).__init__(container_strategy)
   self._device = device
   self._default_device = device
   worker = device_util.canonicalize("/device:CPU:0")
   worker_device_pairs = [(worker, [self._device])]
   device_map = values.SingleDeviceMap(device)
   self._input_workers = values.InputWorkers(device_map, worker_device_pairs)

Пример #5

0

Показать файл

Файл: values_test.py Проект: shanksXU/tensorflow-1

 def testValueErrorForIterator(self):
   # Incompatiable arguments.
   d1 = "/device:GPU:0"
   d2 = "/device:GPU:1"
   device_map = values.ReplicaDeviceMap([d1, d2])
   input_workers = values.InputWorkers(
       device_map, (("w1", (d1,)), ("w2", (d2,))))
   with self.assertRaises(ValueError):
     values.MultiWorkerDataIterator([("w1", None)], input_workers)

Пример #6

0

Показать файл

Файл: values_test.py Проект: shanksXU/tensorflow-1

 def _test_dataset(self, dataset_fn, worker_devices, devices,
                   expected_values):
   device_map = values.ReplicaDeviceMap(devices)
   input_workers = values.InputWorkers(device_map, worker_devices)
   multi_worker_dataset = values.MultiWorkerDataset(
       dataset_fn, input_workers)
   multi_worker_iterator = multi_worker_dataset.make_initializable_iterator()
   with self.cached_session() as sess:
     sess.run(multi_worker_iterator.initializer)
     self._test_iterator(sess, multi_worker_iterator, devices, expected_values)

Пример #7

0

Показать файл

Файл: mirrored_strategy.py Проект: xiyueZhang/tensorflow

 def _initialize_local(self, devices):
   """Initializes the object for local training."""
   self._local_mode = True
   assert devices, "Must specify at least one device."
   devices = tuple(device_util.resolve(d) for d in devices)
   assert len(set(devices)) == len(devices), (
       "No duplicates allowed in `devices` argument: %s" % devices)
   # TODO(josh11b): Require at least 2 devices?
   self._device_map = values.ReplicaDeviceMap(devices)
   self._input_workers = values.InputWorkers(self._device_map)
   self._inferred_cross_device_ops = cross_device_ops_lib.choose_the_best(
       devices)

Пример #8

0

Показать файл

    def _initialize_multi_worker(self, num_gpus_per_worker, cluster_spec,
                                 task_type, task_id):
        """Initializes the object for multi-worker training."""
        if task_type is None or task_id is None:
            raise ValueError(
                "When `cluster_spec` is given, you must also specify "
                "`task_type` and `task_id`")
        if task_type not in ("chief", "worker"):
            raise ValueError(
                "Unrecognized task_type: %r, valid task types are: \"chief\", "
                "\"worker\"." % task_type)
        cluster_spec = multi_worker_util.normalize_cluster_spec(cluster_spec)
        self._num_workers = multi_worker_util.worker_count(
            cluster_spec, task_type)
        if not self._num_workers:
            raise ValueError("No `worker` or `chief` tasks can be found in "
                             "`cluster_spec`.")

        self._is_chief = multi_worker_util.is_chief(cluster_spec, task_type,
                                                    task_id)

        self._worker_device = "/job:%s/task:%d" % (task_type, task_id)
        if num_gpus_per_worker:
            local_devices = tuple("%s/device:GPU:%d" % (self._worker_device, i)
                                  for i in range(num_gpus_per_worker))
        else:
            local_devices = (self._worker_device, )

        self._collective_keys = cross_device_utils.CollectiveKeys()
        self._initialize_local(local_devices)
        self._input_workers = values.InputWorkers(
            self._device_map, [(self._worker_device, self.worker_devices)])
        self._cross_device_ops = cross_device_ops_lib.CollectiveAllReduce(
            num_workers=self._num_workers,
            num_gpus_per_worker=num_gpus_per_worker,
            collective_keys=self._collective_keys)

        # Add a default device so that ops without specified devices will not end up
        # on other workers.
        self._default_device = "/job:%s/task:%d" % (task_type, task_id)

        self._cluster_spec = multi_worker_util.normalize_cluster_spec(
            cluster_spec)
        self._task_type = task_type
        self._task_id = task_id

        logging.info(
            "Multi-worker CollectiveAllReduceStrategy with "
            "cluster_spec = %r, task_type = %r, task_id = %r, "
            "num_workers = %r, local_devices = %r", cluster_spec.as_dict(),
            task_type, task_id, self._num_workers, local_devices)

Пример #9

0

Показать файл

Файл: tpu_strategy.py Проект: xiyueZhang/tensorflow

    def __init__(self,
                 container_strategy,
                 tpu_cluster_resolver=None,
                 steps_per_run=None,
                 num_cores=None):
        super(TPUExtended, self).__init__(container_strategy)

        if tpu_cluster_resolver is None:
            tpu_cluster_resolver = resolver_lib.TPUClusterResolver("")

        if steps_per_run is None:
            # TODO(frankchn): Warn when we are being used by DS/Keras and this is
            # not specified.
            steps_per_run = 1

        self._tpu_cluster_resolver = tpu_cluster_resolver
        self._tpu_metadata = get_tpu_system_metadata(
            self._tpu_cluster_resolver)
        # TODO(sourabhbajaj): Change this from num_cores to metadata_override
        self._num_cores_override = num_cores

        # TODO(jhseu): Switch to DeviceAssignment to support pods and model
        # parallelism.
        self._device_index = {
            d.name: i
            for i, d in enumerate(self._tpu_metadata.devices)
            if "device:TPU:" in d.name
        }
        self._host_device = self.get_host_cpu_device(0)
        self._tpu_devices = tuple(sorted(self._device_index.keys()))
        # Only create variables for the number of replicas we're running.
        self._tpu_devices = self._tpu_devices[:self._num_replicas_in_sync]
        self._device_map = values.ReplicaDeviceMap(self._tpu_devices)

        # For input:
        input_device_map = values.ReplicaDeviceMap(
            tuple(
                self.get_host_cpu_device(hid)
                for hid in range(self.num_hosts)))
        worker_devices = [(self.get_host(hid), [self.get_host_cpu_device(hid)])
                          for hid in range(self.num_hosts)]
        self._input_workers = values.InputWorkers(input_device_map,
                                                  worker_devices)

        # TODO(sourabhbajaj): Remove this once performance of running one step
        # at a time is comparable to multiple steps.
        self.steps_per_run = steps_per_run
        self._require_static_shapes = True

        # Initialize the TPU devices.
        self._initialize_tpu()

Пример #10

0

Показать файл

Файл: values_test.py Проект: shanksXU/tensorflow-1

  def _test_iterator(self, devices, dataset, expected_values):
    device_map = values.ReplicaDeviceMap(devices)
    input_workers = values.InputWorkers(device_map)
    per_replica_dataset = values.PerReplicaDataset(dataset, input_workers, 0)
    if context.executing_eagerly():
      iterator = per_replica_dataset.make_one_shot_iterator()
    else:
      iterator = per_replica_dataset.make_initializable_iterator()
      self.evaluate([iterator.initializer])

    for expected_value in expected_values:
      next_element = iterator.get_next_as_list()
      computed_value = self.evaluate(next_element)
      self.assertEqual(expected_value, computed_value)

    with self.assertRaises(errors.OutOfRangeError):
      next_element = iterator.get_next_as_list()
      self.evaluate(next_element)

Пример #11

0

Показать файл

Файл: values_test.py Проект: shanksXU/tensorflow-1

  def testInitializableIterator(self):
    worker_devices, devices = self._cpu_devices()
    with context.graph_mode(), self.cached_session() as sess:
      dataset_fn = lambda: dataset_ops.Dataset.range(8)
      device_map = values.ReplicaDeviceMap(devices)
      input_workers = values.InputWorkers(device_map, worker_devices)
      multi_worker_dataset = values.MultiWorkerDataset(
          dataset_fn, input_workers)
      multi_worker_iterator = multi_worker_dataset.make_initializable_iterator()

      sess.run(multi_worker_iterator.initializer)
      self._test_iterator(
          sess, multi_worker_iterator, devices,
          [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5], [6, 6], [7, 7]])

      # After re-initializing the iterator, should be able to iterate again.
      sess.run(multi_worker_iterator.initializer)
      self._test_iterator(
          sess, multi_worker_iterator, devices,
          [[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5], [6, 6], [7, 7]])

Пример #12

0

Показать файл

Файл: values_test.py Проект: shanksXU/tensorflow-1

  def _test_iterator(self, input_type, dataset_fn, worker_device_pairs,
                     expected_values, sess=None, split_batch_by=None):
    devices = nest.flatten([ds for _, ds in worker_device_pairs])
    device_map = values.ReplicaDeviceMap(devices)
    input_workers = values.InputWorkers(device_map, worker_device_pairs)

    if input_type == "input_fn":
      input_contexts = [
          distribute_lib.InputContext() for _ in worker_device_pairs]
      input_fn = lambda _: dataset_fn()
      iterator = values.InputFunctionIterator(
          input_fn, input_workers, input_contexts)
    else:
      iterator = values.DatasetIterator(
          dataset_fn(), input_workers, split_batch_by)

    evaluate = lambda x: sess.run(x) if sess else self.evaluate(x)

    evaluate(control_flow_ops.group(iterator.initialize()))

    for expected_value in expected_values:
      next_element = iterator.get_next()
      computed_value = evaluate(
          [values.select_replica(r, next_element) for r in range(len(devices))])
      self.assertAllEqual(expected_value, computed_value)

    with self.assertRaises(errors.OutOfRangeError):
      next_element = iterator.get_next()
      evaluate([values.select_replica(r, next_element)
                for r in range(len(devices))])

    # After re-initializing the iterator, should be able to iterate again.
    evaluate(control_flow_ops.group(iterator.initialize()))

    for expected_value in expected_values:
      next_element = iterator.get_next()
      computed_value = evaluate(
          [values.select_replica(r, next_element) for r in range(len(devices))])
      self.assertAllEqual(expected_value, computed_value)

Пример #13

0

Показать файл

    def _initialize_multi_worker(self, num_gpus_per_worker, cluster_spec,
                                 task_type, task_id):
        """Initialize devices for multiple workers.

    It creates variable devices and compute devices. Variables and operations
    will be assigned to them respectively. We have one compute device per
    replica. The variable device is a device function or device string. The
    default variable device assigns variables to parameter servers in a
    round-robin fashion.

    Args:
      num_gpus_per_worker: number of local GPUs or GPUs per worker.
      cluster_spec: a dict, ClusterDef or ClusterSpec object specifying the
        cluster configurations.
      task_type: the current task type.
      task_id: the current task id.

    Raises:
      ValueError: if the cluster_spec doesn't have ps jobs.
    """
        assert cluster_spec
        if not task_type or task_id is None:
            raise ValueError(
                "When `cluster_spec` is given, you must also specify "
                "`task_type` and `task_id`")
        cluster_spec = multi_worker_util.normalize_cluster_spec(cluster_spec)

        worker_device = "/job:%s/task:%d" % (self._task_type, self._task_id)

        # Define compute devices which is a list of device strings and one for each
        # replica. When there are GPUs, replicate operations on these GPUs.
        # Otherwise, place operations on CPU.
        if num_gpus_per_worker > 0:
            compute_devices = tuple("%s/device:GPU:%d" % (worker_device, i)
                                    for i in range(num_gpus_per_worker))
        else:
            compute_devices = (worker_device, )

        self._device_map = values.ReplicaDeviceMap(compute_devices)
        self._input_workers = values.InputWorkers(
            self._device_map, [(worker_device, compute_devices)])

        # In distributed mode, place variables on ps jobs in a round-robin fashion.
        # Note that devices returned from `replica_device_setter` are not
        # canonical and therefore we don't canonicalize all variable devices to
        # make them consistent.
        # TODO(yuefengz): support passing a strategy object to control variable
        # assignment.
        # TODO(yuefengz): merge the logic of replica_device_setter into this
        # class.
        num_ps_replicas = len(cluster_spec.as_dict().get("ps", []))
        if num_ps_replicas == 0:
            raise ValueError("The cluster spec needs to have `ps` jobs.")
        self._variable_device = device_setter.replica_device_setter(
            ps_tasks=num_ps_replicas,
            worker_device=worker_device,
            merge_devices=True,
            cluster=cluster_spec)

        # The `_parameter_devices` is needed for the `parameter_devices` property
        # and is a list of all variable devices. Here parameter devices are all
        # tasks of the "ps" job.
        self._parameter_devices = tuple(
            map("/job:ps/task:{}".format, range(num_ps_replicas)))

        # Add a default device so that ops without specified devices will not end up
        # on other workers.
        self._default_device = worker_device

        self._is_chief = multi_worker_util.is_chief(cluster_spec, task_type,
                                                    task_id)
        self._cluster_spec = cluster_spec
        self._task_type = task_type
        self._task_id = task_id

        logging.info(
            "Multi-worker ParameterServerStrategy with "
            "cluster_spec = %r, task_type = %r, task_id = %r, "
            "num_ps_replicas = %r, is_chief = %r, device_map = %r, "
            "variable_device = %r", cluster_spec.as_dict(), task_type, task_id,
            num_ps_replicas, self._is_chief, self._device_map,
            self._variable_device)

Python InputWorkers примеры использования