Python AllReduceCrossDeviceOps示例

编程语言: Python

命名空间/包名称: tensorflow.contrib.distribute.python.cross_tower_ops

方法/功能: AllReduceCrossDeviceOps

hotexamples.com的示例: 2

Python AllReduceCrossDeviceOps - 已找到2个示例。这些是从开源项目中提取的最受好评的tensorflow.contrib.distribute.python.cross_tower_ops.AllReduceCrossDeviceOps现实Python示例。您可以评价示例，以帮助我们提高示例质量。

示例#1

显示文件

文件： mirrored_strategy.py 项目： zhangshengsheng/tensorflow

    def configure(self,
                  session_config=None,
                  cluster_spec=None,
                  task_type=None,
                  task_id=None):
        del task_type, task_id

        if session_config:
            session_config.isolate_session_state = True

        if cluster_spec:
            self._initialize_multi_worker(self._num_gpus, cluster_spec)

        if self._cross_tower_ops is None:
            if self._cluster_spec:
                # It currently cannot detect the toplogy of remote workers. So we
                # hard-code the multi-worker all-reduce algorithm for now.
                if len(self._workers) == 1:
                    # The default is "nccl".
                    self._cross_tower_ops = cross_tower_ops_lib.AllReduceCrossDeviceOps(
                    )
                else:
                    # The default is hierarchical reduce and broadcast.
                    self._cross_tower_ops = cross_tower_ops_lib.MultiWorkerAllReduce(
                        self._workers, self._num_gpus)
            else:
                self._cross_tower_ops = cross_tower_ops_lib.choose_the_best(
                    self._devices, session_config=session_config)

示例#2

显示文件

文件： cross_tower_ops_test.py 项目： zzhangncsu/tensorflow

class SingleWorkerCrossDeviceOpsTest(CrossDeviceOpsTestBase):
    # TODO(yuefengz): decouple the num_gpus check from distribution in
    # combinations module so that we can pass in devices instead of a distribution
    # strategy.
    reduction_to_one_combinations = combinations.combine(
        cross_tower_ops=[
            combinations.NamedObject(
                "DefaultReductionToOneDeviceCrossDeviceOps",
                cross_tower_ops_lib.ReductionToOneDeviceCrossDeviceOps()),
            combinations.NamedObject(
                "ReductionToCPUDeviceCrossDeviceOps",
                cross_tower_ops_lib.ReductionToOneDeviceCrossDeviceOps(
                    reduce_to_device=_cpu_device)),
            combinations.NamedObject(
                "AccumulateNCrossDeviceOp",
                cross_tower_ops_lib.ReductionToOneDeviceCrossDeviceOps(
                    accumulation_fn=math_ops.accumulate_n)),
        ],
        distribution=[
            combinations.one_device_strategy,
            combinations.mirrored_strategy_with_gpu_and_cpu,
            combinations.mirrored_strategy_with_two_gpus
        ],
        mode=["graph", "eager"])
    allreduce_combinations = combinations.combine(
        cross_tower_ops=[
            combinations.NamedObject(
                "AllReduce",
                cross_tower_ops_lib.AllReduceCrossDeviceOps("nccl", 1, 0, 0)),
            combinations.NamedObject(
                "HierarchicalCopy",
                cross_tower_ops_lib.AllReduceCrossDeviceOps(
                    "hierarchical_copy", 8, 0, 0)),
            combinations.NamedObject(
                "AllReduceNoGradientRepacking",
                cross_tower_ops_lib.AllReduceCrossDeviceOps("nccl", 0, 0, 0)),
            combinations.NamedObject(
                "HierarchicalCopyAggregateSmallTensors",
                cross_tower_ops_lib.AllReduceCrossDeviceOps(
                    "hierarchical_copy", 0, 100, 10))
        ],
        distribution=[combinations.mirrored_strategy_with_two_gpus],
        mode=["graph", "eager"])

    @combinations.generate(reduction_to_one_combinations +
                           allreduce_combinations)
    def testReductionAndBroadcast(self, cross_tower_ops, distribution):
        with distribution.scope():
            self._testReductionAndBroadcast(cross_tower_ops, distribution)

    def testChooseAlgorithm(self):
        device_links = [[1, 2, 3, 4], [0, 2, 3, 5], [0, 1, 3, 6], [0, 1, 2, 7],
                        [0, 5, 6, 7], [1, 4, 6, 7], [2, 4, 5, 7], [3, 4, 5, 6]]
        result = cross_tower_ops_lib._choose_all_reduce_algorithm(device_links)
        self.assertIsInstance(result,
                              cross_tower_ops_lib.AllReduceCrossDeviceOps)
        self.assertEqual(result._all_reduce_alg, "hierarchical_copy")
        self.assertEqual(result._num_packs, 8)

        # if there are only 4 devices
        device_links = [[1, 2, 3, 4], [0, 2, 3, 5], [0, 1, 3, 6], [0, 1, 2, 7]]
        result = cross_tower_ops_lib._choose_all_reduce_algorithm(device_links)
        self.assertIsInstance(result,
                              cross_tower_ops_lib.AllReduceCrossDeviceOps)
        self.assertEqual(result._all_reduce_alg, "nccl")
        self.assertEqual(result._num_packs, 1)

        # if devices links contain each device itself
        device_links = [[0, 1, 2, 3, 4], [0, 1, 2, 3, 5], [0, 1, 2, 3, 6],
                        [0, 1, 2, 3, 7], [0, 4, 5, 6, 7], [1, 4, 5, 6, 7],
                        [2, 4, 5, 6, 7], [3, 4, 5, 6, 7]]
        result = cross_tower_ops_lib._choose_all_reduce_algorithm(device_links)
        self.assertIsInstance(result,
                              cross_tower_ops_lib.AllReduceCrossDeviceOps)
        self.assertEqual(result._all_reduce_alg, "hierarchical_copy")
        self.assertEqual(result._num_packs, 8)

        # if not dgx1-like links
        device_links = [[0, 2, 3, 5], [0, 1, 3, 6], [0, 1, 2, 7], [0, 5, 6, 7],
                        [1, 4, 6, 7], [2, 4, 5, 7], [3, 4, 5, 6], [1, 2, 3, 4]]
        result = cross_tower_ops_lib._choose_all_reduce_algorithm(device_links)
        self.assertIsInstance(result,
                              cross_tower_ops_lib.AllReduceCrossDeviceOps)
        self.assertEqual(result._all_reduce_alg, "nccl")
        self.assertEqual(result._num_packs, 1)

    @combinations.generate(
        combinations.combine(mode=["graph", "eager"], required_gpus=1))
    def testSimpleReduceWithIndexedSlices(self):
        devices = ["/cpu:0", "/gpu:0"]
        t0 = _make_indexed_slices([[1., 2.]], [1], [5, 2], devices[0])
        t1 = _make_indexed_slices([[3., 4.], [5., 6.]], [1, 3], [5, 2],
                                  devices[1])
        per_device = value_lib.PerDevice({devices[0]: t0, devices[1]: t1})
        result = cross_tower_ops_lib._simple_reduce(per_device, devices[0],
                                                    math_ops.add_n,
                                                    vs.VariableAggregation.SUM)

        # Test that the result is semantically equal to both the concatenated
        # IndexedSlices with and without duplicate indices.
        total_with_dups = _make_indexed_slices([[1., 2.], [3., 4.], [5., 6.]],
                                               [1, 1, 3], [5, 2], devices[0])
        total_without_dups = _make_indexed_slices([[4., 6.], [5., 6.]], [1, 3],
                                                  [5, 2], devices[0])
        self._assert_indexed_slices_equal(total_with_dups, result)
        self._assert_indexed_slices_equal(total_without_dups, result)

    @combinations.generate(
        combinations.combine(cross_tower_ops_instance=[
            combinations.NamedObject(
                "ReductionToOneDeviceCrossDeviceOps",
                cross_tower_ops_lib.ReductionToOneDeviceCrossDeviceOps()),
            combinations.NamedObject(
                "AllReduceCrossDeviceOps",
                cross_tower_ops_lib.AllReduceCrossDeviceOps())
        ],
                             aggregation=[
                                 vs.VariableAggregation.SUM,
                                 vs.VariableAggregation.MEAN
                             ],
                             batch_reduce=[True, False],
                             mode=["graph", "eager"],
                             required_gpus=1))
    def testIndexedSlicesAllReduce(self, cross_tower_ops_instance, aggregation,
                                   batch_reduce):
        devices = ["/cpu:0", "/gpu:0"]
        dense_shape = [5, 2]
        t0 = _make_indexed_slices([[1., 2.]], [1], dense_shape, devices[0])
        t1 = _make_indexed_slices([[3., 4.], [5., 6.]], [1, 3], dense_shape,
                                  devices[1])
        per_device = value_lib.PerDevice({devices[0]: t0, devices[1]: t1})

        if batch_reduce:
            result = cross_tower_ops_instance.batch_reduce(
                aggregation, [(per_device, devices)])
        else:
            result = cross_tower_ops_instance.reduce(aggregation, per_device,
                                                     devices)

        total_indices_with_dups = [1, 1, 3]
        total_indices_without_dups = [1, 3]

        if aggregation == vs.VariableAggregation.SUM:
            total_values_with_dups = [[1., 2.], [3., 4.], [5., 6.]]
            total_values_without_dups = [[4., 6.], [5., 6.]]
        else:
            assert aggregation == vs.VariableAggregation.MEAN
            total_values_with_dups = [[0.5, 1.], [1.5, 2.], [2.5, 3.]]
            total_values_without_dups = [[2., 3.], [2.5, 3.]]

        total_mirrored_with_dups = _make_mirrored_indexed_slices(
            devices, total_values_with_dups, total_indices_with_dups,
            dense_shape)
        total_mirrored_without_dups = _make_mirrored_indexed_slices(
            devices, total_values_without_dups, total_indices_without_dups,
            dense_shape)

        # Test that the result is semantically equal to both the concatenated
        # IndexedSlices, as well as when the duplicate indices are summed up.
        if batch_reduce:
            total_mirrored_with_dups = [total_mirrored_with_dups]
            total_mirrored_without_dups = [total_mirrored_without_dups]

        self._assert_values_equal(total_mirrored_with_dups, result)
        self._assert_values_equal(total_mirrored_without_dups, result)