Python all_reduce示例

编程语言: Python

命名空间/包名称: torch.cuda.nccl

方法/功能: all_reduce

hotexamples.com的示例: 6

Python all_reduce - 已找到6个示例。这些是从开源项目中提取的最受好评的torch.cuda.nccl.all_reduce现实Python示例。您可以评价示例，以帮助我们提高示例质量。

示例#1

显示文件

文件： test_nccl.py 项目： zwytop/pytorch

    def test_all_reduce(self):
        tensors = [torch.FloatTensor(128).uniform_() for i in range(nGPUs)]
        expected = torch.FloatTensor(128).zero_()
        for t in tensors:
            expected.add_(t)

        tensors = [tensors[i].cuda(i) for i in range(nGPUs)]
        nccl.all_reduce(tensors)

        for tensor in tensors:
            self.assertEqual(tensor, expected)

示例#2

显示文件

文件： test_nccl.py 项目： bhuWenDongchao/pytorch

    def test_all_reduce(self):
        tensors = [torch.FloatTensor(128).uniform_() for i in range(nGPUs)]
        expected = torch.FloatTensor(128).zero_()
        for t in tensors:
            expected.add_(t)

        tensors = [tensors[i].cuda(i) for i in range(nGPUs)]
        nccl.all_reduce(tensors)

        for tensor in tensors:
            self.assertEqual(tensor, expected)

示例#3

显示文件

文件： parallel.py 项目： negative09/PyTorch-Encoding

def nccl_all_reduce(inputs):
    # TODO, figure out why nccl all_reduce doesn't work for gradcheck
    input_size = inputs[0].size()
    #if nccl.is_available(inputs):
    for i, inp in enumerate(inputs):
        assert inp.is_cuda, \
            "reduce_add expects all inputs to be on GPUs"
        if inp.size() != input_size:
            got = 'x'.join(str(x) for x in inp.size())
            expected = 'x'.join(str(x) for x in input_size)
            raise ValueError("input {} has invalid size: got {}, \
                but expected {}".format(i, got, expected))
    nccl.all_reduce(inputs)
    return inputs

示例#4

显示文件

文件： test_nccl.py 项目： SujoysGithub/pytorch-gaudi

    def test_all_reduce(self, device, dtype):
        if TEST_WITH_ROCM and HIP_VERSION < 3.5 and dtype == torch.bfloat16:
            raise unittest.SkipTest("Skip bfloat16 test for ROCm < 3.5")

        tensors = [torch.zeros(128).uniform_().to(dtype=dtype) for i in range(nGPUs)]
        expected = torch.zeros(128, dtype=dtype)
        for t in tensors:
            expected.add_(t)

        tensors = [tensors[i].cuda(i) for i in range(nGPUs)]
        nccl.all_reduce(tensors)

        for tensor in tensors:
            self.assertEqual(tensor, expected)

示例#5

显示文件

    def test_collective_errors(self, device):
        t = torch.rand(10).cuda(0)
        with self.assertRaisesRegex(
                TypeError, "Inputs should be a collection of tensors"):
            nccl.all_reduce(t)

        with self.assertRaisesRegex(
                TypeError, "Inputs should be a collection of tensors"):
            nccl.reduce(t)

        with self.assertRaisesRegex(
                TypeError, "Inputs should be a collection of tensors"):
            nccl.broadcast(t)

        with self.assertRaisesRegex(
                TypeError, "Inputs should be a collection of tensors"):
            nccl.all_gather(t, t)

        with self.assertRaisesRegex(
                TypeError, "Inputs should be a collection of tensors"):
            nccl.reduce_scatter(t, t)

示例#6

显示文件

    def test_all_reduce(self, device, dtype):
        cpu_tensors = [
            torch.zeros(128).uniform_().to(dtype=dtype) for i in range(nGPUs)
        ]
        expected = torch.zeros(128, dtype=dtype)
        for t in cpu_tensors:
            expected.add_(t)

        tensors = [cpu_tensors[i].cuda(i) for i in range(nGPUs)]
        nccl.all_reduce(tensors)

        for tensor in tensors:
            self.assertEqual(tensor, expected)

        # Test with tuple.
        tensors = tuple(cpu_tensors[i].cuda(i) for i in range(nGPUs))
        nccl.all_reduce(tensors)

        for tensor in tensors:
            self.assertEqual(tensor, expected)

        # Test with set.
        tensors = {cpu_tensors[i].cuda(i) for i in range(nGPUs)}
        nccl.all_reduce(tensors)

        for tensor in tensors:
            self.assertEqual(tensor, expected)