示例#1
0
def test_scatter_large_dataset_naive():
    mpi_comm = mpi4py.MPI.COMM_WORLD
    communicator = NaiveCommunicator(mpi_comm)

    # This test only runs when comm.size >= 2.
    if communicator.size == 1:
        pytest.skip('This test is for multinode')

    scatter_large_data(communicator)
示例#2
0
class TestDataset(unittest.TestCase):
    def setUp(self):
        self.mpi_comm = mpi4py.MPI.COMM_WORLD
        self.communicator = NaiveCommunicator(self.mpi_comm)

    def check_scatter_dataset(self, original_dataset, shuffle=False, root=0):
        if self.communicator.rank != root:
            original_dataset = None
        my_dataset = chainermn.scatter_dataset(original_dataset,
                                               self.communicator,
                                               shuffle=shuffle,
                                               root=root)
        sub_datasets = self.communicator.gather_obj(my_dataset, root=root)

        if self.communicator.rank == root:
            # Test the sizes
            sub_sizes = [len(sub_dataset) for sub_dataset in sub_datasets]
            self.assertEqual(len(set(sub_sizes)), 1)
            sub_size = sub_sizes[0]
            self.assertLessEqual(len(original_dataset),
                                 sub_size * self.mpi_comm.size)
            self.assertGreater(len(original_dataset),
                               (sub_size - 1) * self.mpi_comm.size)

            # Test the content of scattered datasets
            joined_dataset = sum(
                (sub_dataset[:] for sub_dataset in sub_datasets), [])
            self.assertEqual(set(joined_dataset), set(original_dataset))

    def test_scatter_dataset(self):
        n = self.communicator.size

        for shuffle in [True, False]:
            for root in range(self.communicator.size):
                self.check_scatter_dataset([], shuffle, root)
                self.check_scatter_dataset([0], shuffle, root)
                self.check_scatter_dataset(list(range(n)), shuffle, root)
                self.check_scatter_dataset(list(range(n * 5 - 1)), shuffle,
                                           root)

                self.check_scatter_dataset(np.array([]), shuffle, root)
                self.check_scatter_dataset(np.array([0]), shuffle, root)
                self.check_scatter_dataset(np.arange(n), shuffle, root)
                self.check_scatter_dataset(np.arange(n * 5 - 1), shuffle, root)
示例#3
0
class TestDataset(unittest.TestCase):

    def setUp(self):
        self.mpi_comm = mpi4py.MPI.COMM_WORLD
        self.communicator = NaiveCommunicator(self.mpi_comm)

    def check_scatter_dataset(self, original_dataset, shuffle=False, root=0):
        my_dataset = chainermn.scatter_dataset(
            original_dataset, self.communicator,
            shuffle=shuffle, root=root)
        sub_datasets = self.communicator.gather_obj(my_dataset, root=root)

        if self.communicator.rank == root:
            # Test the sizes
            sub_sizes = [len(sub_dataset) for sub_dataset in sub_datasets]
            self.assertEqual(len(set(sub_sizes)), 1)
            sub_size = sub_sizes[0]
            self.assertLessEqual(
                len(original_dataset), sub_size * self.mpi_comm.size)
            self.assertGreater(
                len(original_dataset), (sub_size - 1) * self.mpi_comm.size)

            # Test the content of scattered datasets
            joined_dataset = sum((sub_dataset[:]
                                  for sub_dataset in sub_datasets), [])
            self.assertEqual(set(joined_dataset), set(original_dataset))

    def test_scatter_dataset(self):
        n = self.communicator.size

        for shuffle in [True, False]:
            for root in range(self.communicator.size):
                self.check_scatter_dataset([], shuffle, root)
                self.check_scatter_dataset([0], shuffle, root)
                self.check_scatter_dataset(list(range(n)), shuffle, root)
                self.check_scatter_dataset(list(range(n * 5 - 1)),
                                           shuffle, root)

                self.check_scatter_dataset(np.array([]), shuffle, root)
                self.check_scatter_dataset(np.array([0]), shuffle, root)
                self.check_scatter_dataset(np.arange(n), shuffle, root)
                self.check_scatter_dataset(np.arange(n * 5 - 1), shuffle, root)
示例#4
0
def create_communicator(communicator_name='hierarchical',
                        mpi_comm=None,
                        allreduce_grad_dtype=None):
    """Create a ChainerMN communicator.

    Different communicators provide different approaches of communication, so
    they have different performance charasteristics. The default communicator
    ``hierarchical`` is expected to generally perform well on a variety of
    environments, so one need not to change communicators in most cases.
    However, choosing proper communicator may give better performance.
    The following communicators are available.

    +---------------+---+---+--------+--------------------------------------+
    |Name           |CPU|GPU|NCCL    |Recommended Use Cases                 |
    +===============+===+===+========+======================================+
    |pure_nccl      |   |OK |Required|``pure_nccl`` is recommended when     |
    |               |   |   |(>= v2) |NCCL2 is available in the environment.|
    +---------------+---+---+--------+--------------------------------------+
    |hierarchical   |   |OK |Required|Each node has a single NIC or HCA     |
    +---------------+---+---+--------+--------------------------------------+
    |two_dimensional|   |OK |Required|Each node has multiple NICs or HCAs   |
    +---------------+---+---+--------+--------------------------------------+
    |single_node    |   |OK |Required|Single node with multiple GPUs        |
    +---------------+---+---+--------+--------------------------------------+
    |flat           |   |OK |        |N/A                                   |
    +---------------+---+---+--------+--------------------------------------+
    |naive          |OK |OK |        |Testing on CPU mode                   |
    +---------------+---+---+--------+--------------------------------------+

    Args:
        communicator_name: The name of communicator (``naive``, ``flat``,
          ``hierarchical``, ``two_dimensional``, ``pure_nccl``, or
          ``single_node``)
        mpi_comm: MPI4py communicator
        allreduce_grad_dtype: Data type of gradient used in All-Reduce.
          If ``None``, the dtype of a model is used.

    Returns:
        ChainerMN communicator that implements methods defined in
        :class:`chainermn.CommunicatorBase`

    """

    if mpi_comm is None:
        import mpi4py.MPI
        mpi_comm = mpi4py.MPI.COMM_WORLD

    if communicator_name != 'pure_nccl' and allreduce_grad_dtype is not None:
        raise ValueError('allreduce_grad_dtype is only available'
                         'at \'pure_nccl\' communicator.')

    if communicator_name == 'naive':
        from chainermn.communicators.naive_communicator \
            import NaiveCommunicator
        return NaiveCommunicator(mpi_comm=mpi_comm)

    elif communicator_name == 'flat':
        from chainermn.communicators.flat_communicator \
            import FlatCommunicator
        return FlatCommunicator(mpi_comm=mpi_comm)

    elif communicator_name == 'hierarchical':
        from chainermn.communicators.hierarchical_communicator \
            import HierarchicalCommunicator
        return HierarchicalCommunicator(mpi_comm=mpi_comm)

    elif communicator_name == 'two_dimensional':
        from chainermn.communicators.two_dimensional_communicator \
            import TwoDimensionalCommunicator
        return TwoDimensionalCommunicator(mpi_comm=mpi_comm)

    elif communicator_name == 'single_node':
        from chainermn.communicators.single_node_communicator \
            import SingleNodeCommunicator
        return SingleNodeCommunicator(mpi_comm=mpi_comm)

    elif communicator_name == 'non_cuda_aware':
        from chainermn.communicators.non_cuda_aware_communicator \
            import NonCudaAwareCommunicator
        return NonCudaAwareCommunicator(mpi_comm=mpi_comm)

    elif communicator_name == 'pure_nccl':
        from chainermn.communicators.pure_nccl_communicator \
            import PureNcclCommunicator
        return PureNcclCommunicator(mpi_comm=mpi_comm,
                                    allreduce_grad_dtype=allreduce_grad_dtype)

    elif communicator_name == 'dummy':
        from chainermn.communicators.dummy_communicator \
            import DummyCommunicator
        return DummyCommunicator(mpi_comm=mpi_comm)

    else:
        raise ValueError(
            'Unrecognized communicator: "{}"'.format(communicator_name))
示例#5
0
 def setUp(self):
     self.mpi_comm = mpi4py.MPI.COMM_WORLD
     self.communicator = NaiveCommunicator(self.mpi_comm)
示例#6
0
 def setUp(self):
     self.mpi_comm = mpi4py.MPI.COMM_WORLD
     self.communicator = NaiveCommunicator(self.mpi_comm)
示例#7
0
def create_communicator(communicator_name='pure_nccl',
                        mpi_comm=None,
                        allreduce_grad_dtype=None,
                        batched_copy=False):
    """Create a ChainerMN communicator.

    Different communicators provide different approaches of communication, so
    they have different performance charasteristics. The default communicator
    ``pure_nccl`` is expected to generally perform well on a variety of
    environments, so one need not to change communicators in most cases.
    However, you may need to choose other communicators depending on
    your computing platform and the availability of NCCL library.
    The following communicators are available.

    +---------------+---+---+--------+--------------------------------------+
    |Name           |CPU|GPU|NCCL    |Recommended Use Cases                 |
    +===============+===+===+========+======================================+
    |pure_nccl      |   |OK |Required|``pure_nccl`` is recommended when     |
    |               |   |   |(>= v2) |NCCL2 is available in the environment.|
    +---------------+---+---+--------+--------------------------------------+
    |flat           |   |OK |        |N/A                                   |
    +---------------+---+---+--------+--------------------------------------+
    |naive          |OK |OK |        |Testing on CPU mode                   |
    +---------------+---+---+--------+--------------------------------------+

    pure_nccl communicator supports multiple data types, FP32 and FP16,
    in gradient exchange. The communication data type is determined based on
    `chainer.global_config.dtype` and `allreduce_grad_dtype`.
    When `allreduce_grad_dtype` is the default value `None`,
    FP32 is used when `chainer.global_config.dtype` is `numpy.float32` and
    FP16 otherwise.
    `allreduce_grad_dtype` parameter,
    which is either `numpy.float16` or `numpy.float32`,
    overwrites the `chainer.global_config.dtype`.

    The table blow summarizes the data type selection in gradient exchange.

    +---------------------+--------------------------------------------+
    |                     |              allreduce_grad_dtype          |
    +---------------------+---------+------------------+---------------+
    | global_config.dtype | None    |   numpy.float16  | numpy.float32 |
    +=====================+=========+==================+===============+
    | chainer.mixed16     | FP16    |   FP16           | FP32          |
    +---------------------+---------+------------------+---------------+
    | numpy.float16       | FP16    |   FP16           | FP32          |
    +---------------------+---------+------------------+---------------+
    | numpy.float32       | FP32    |   FP16           | FP32          |
    +---------------------+---------+------------------+---------------+

    Other communicators, namely ``flat`` and ``naive``, support only
    float32 communication, no matter what the model is. This is due to
    MPI's limited support of float16.

    Args:
        communicator_name: The name of communicator (``naive``, ``flat``,
          or ``pure_nccl``)
        mpi_comm: MPI4py communicator
        allreduce_grad_dtype: Data type of gradient used in All-Reduce.
          If ``None``, the dtype of a model is used.

    Returns:
        ChainerMN communicator that implements methods defined in
        :class:`chainermn.CommunicatorBase`

    """

    if mpi_comm is None:
        try:
            import mpi4py.MPI
        except ImportError as e:
            raise ImportError(
                str(e) + ': '
                'ChainerMN requires mpi4py for '
                'distributed training. '
                'Please read the Chainer official document '
                'and setup MPI and mpi4py.')
        mpi_comm = mpi4py.MPI.COMM_WORLD

    if communicator_name != 'pure_nccl' and allreduce_grad_dtype is not None:
        raise ValueError('allreduce_grad_dtype is only available'
                         'at \'pure_nccl\' communicator.')

    if communicator_name == 'naive':
        from chainermn.communicators.naive_communicator \
            import NaiveCommunicator
        return NaiveCommunicator(mpi_comm=mpi_comm, batched_copy=batched_copy)

    elif communicator_name == 'flat':
        from chainermn.communicators.flat_communicator \
            import FlatCommunicator
        return FlatCommunicator(mpi_comm=mpi_comm, batched_copy=batched_copy)

    elif communicator_name == 'non_cuda_aware':
        from chainermn.communicators.non_cuda_aware_communicator \
            import NonCudaAwareCommunicator
        return NonCudaAwareCommunicator(mpi_comm=mpi_comm,
                                        batched_copy=batched_copy)

    elif communicator_name == 'pure_nccl':
        from chainermn.communicators.pure_nccl_communicator \
            import PureNcclCommunicator
        return PureNcclCommunicator(mpi_comm=mpi_comm,
                                    allreduce_grad_dtype=allreduce_grad_dtype,
                                    batched_copy=batched_copy)

    elif communicator_name == 'dummy':
        from chainermn.communicators.dummy_communicator \
            import DummyCommunicator
        return DummyCommunicator(mpi_comm=mpi_comm, batched_copy=batched_copy)

    else:
        raise ValueError(
            'Unrecognized communicator: "{}"'.format(communicator_name))
示例#8
0
def create_communicator(communicator_name='hierarchical', mpi_comm=None):
    """Create a ChainerMN communicator.

    Different communicators provide different approaches of communication, so
    they have different performance charasteristics. The default communicator
    ``hierarchical`` is expected to generally perform well on a variety of
    environments, so one need not to change communicators in most cases.
    However, choosing proper communicator may give better performance.
    The following communicators are available.

    =============== === === ======== =======================================
    Name            CPU GPU NCCL     Recommended Use Cases
    =============== === === ======== =======================================
    naive           OK  OK           Testing on CPU mode
    hierarchical        OK  Required Each node has a single NIC or HCA
    two_dimensional     OK  Required Each node has multiple NICs or HCAs
    single_node         OK  Required Single node with multiple GPUs
    flat                OK           N/A
    =============== === === ======== =======================================

    Args:
        communicator_name: The name of communicator (``naive``, ``flat``,
          ``hierarchical``, ``two_dimensional``, or ``single_node``)
        mpi_comm: MPI4py communicator

    Returns:
        ChainerMN communicator

    """

    if mpi_comm is None:
        import mpi4py.MPI
        mpi_comm = mpi4py.MPI.COMM_WORLD

    if communicator_name == 'naive':
        from chainermn.communicators.naive_communicator \
            import NaiveCommunicator
        return NaiveCommunicator(mpi_comm=mpi_comm)

    elif communicator_name == 'flat':
        from chainermn.communicators.flat_communicator \
            import FlatCommunicator
        return FlatCommunicator(mpi_comm=mpi_comm)

    elif communicator_name == 'hierarchical':
        from chainermn.communicators.hierarchical_communicator \
            import HierarchicalCommunicator
        return HierarchicalCommunicator(mpi_comm=mpi_comm)

    elif communicator_name == 'two_dimensional':
        from chainermn.communicators.two_dimensional_communicator \
            import TwoDimensionalCommunicator
        return TwoDimensionalCommunicator(mpi_comm=mpi_comm)

    elif communicator_name == 'single_node':
        from chainermn.communicators.single_node_communicator \
            import SingleNodeCommunicator
        return SingleNodeCommunicator(mpi_comm=mpi_comm)

    elif communicator_name == 'dummy':
        from chainermn.communicators.dummy_communicator \
            import DummyCommunicator
        return DummyCommunicator(mpi_comm=mpi_comm)

    else:
        raise ValueError(
            'Unrecognized communicator: "{}"'.format(communicator_name))
示例#9
0
class TestDataset(unittest.TestCase):
    def setUp(self):
        self.mpi_comm = mpi4py.MPI.COMM_WORLD
        self.communicator = NaiveCommunicator(self.mpi_comm)

    def check_scatter_dataset(self, original_dataset, shuffle=False, root=0):
        if self.communicator.rank != root:
            original_dataset = None
        my_dataset = chainermn.scatter_dataset(original_dataset,
                                               self.communicator,
                                               shuffle=shuffle,
                                               root=root)
        sub_datasets = self.communicator.gather_obj(my_dataset, root=root)

        if self.communicator.rank == root:
            # Test the sizes
            sub_sizes = [len(sub_dataset) for sub_dataset in sub_datasets]
            self.assertEqual(len(set(sub_sizes)), 1)
            sub_size = sub_sizes[0]
            self.assertLessEqual(len(original_dataset),
                                 sub_size * self.mpi_comm.size)
            self.assertGreater(len(original_dataset),
                               (sub_size - 1) * self.mpi_comm.size)

            # Test the content of scattered datasets
            joined_dataset = sum(
                (sub_dataset[:] for sub_dataset in sub_datasets), [])

            # NOTE: The values in `original_dataset` and
            # `joined_dataset` must be casted to int to compare.
            # There are 2 backgrounds on this issue.
            #
            # (1) numpy and cupy/chainerx have different behaviours on
            # 1-element array. Numpy implicitly converts a 1-element array to
            # a scalar value.
            # type(numpy.array([1])[0])
            # =>  <class 'numpy.int64'>  # Scalar
            # type(chainerx.array([1])[0])
            # => <class 'chainerx.ndarray'>  # array of one element
            #
            # (2) Two different ChainerX arrays are never identical in the
            # context of `set()`.
            # set([chainerx.array([0]), chainerx.array([0])])
            # => {array([0], shape=(1,), dtype=int64, device='native:0'),
            #     array([0], shape=(1,), dtype=int64, device='native:0')}

            joined_dataset = [int(e) for e in joined_dataset]
            original_dataset = [int(e) for e in original_dataset]
            self.assertEqual(set(joined_dataset), set(original_dataset))

    def test_scatter_dataset(self):
        n = self.communicator.size

        for shuffle in [True, False]:
            for root in range(self.communicator.size):
                self.check_scatter_dataset([], shuffle, root)
                self.check_scatter_dataset([0], shuffle, root)
                self.check_scatter_dataset(list(range(n)), shuffle, root)
                self.check_scatter_dataset(list(range(n * 5 - 1)), shuffle,
                                           root)

                self.check_scatter_dataset(np.array([]), shuffle, root)
                self.check_scatter_dataset(np.array([0]), shuffle, root)
                self.check_scatter_dataset(np.arange(n), shuffle, root)
                self.check_scatter_dataset(np.arange(n * 5 - 1), shuffle, root)

                self.check_scatter_dataset(chx.array([]), shuffle, root)
                self.check_scatter_dataset(chx.array([0]), shuffle, root)
                self.check_scatter_dataset(chx.arange(n), shuffle, root)
                self.check_scatter_dataset(chx.arange(n * 5 - 1), shuffle,
                                           root)