Пример #1
0
    def test_calculate_shuffle_buffer_size(self, mock_local_size,
                                           mock_allgather):
        import horovod.torch as hvd
        hvd.init()

        # case with 2 workers, one with 5 ranks and second with 3 ranks
        mock_allgather.return_value = torch.tensor([5, 5, 5, 5, 5, 3, 3, 3])
        mock_local_size.return_value = 2

        avg_row_size = 100000
        train_row_count_per_worker = 1000000

        calculate_shuffle_buffer_size = remote._calculate_shuffle_buffer_size_fn(
            train_row_count_per_worker, avg_row_size, None)
        shuffle_size = calculate_shuffle_buffer_size()

        actual = int(shuffle_size)
        expected = int(constants.TOTAL_BUFFER_MEMORY_CAP_GIB *
                       constants.BYTES_PER_GIB / avg_row_size / 5)
        assert actual == expected

        calculate_shuffle_buffer_size = remote._calculate_shuffle_buffer_size_fn(
            train_row_count_per_worker, avg_row_size, 0)
        shuffle_size = calculate_shuffle_buffer_size()
        # Set 0 for non-shuffle
        assert int(shuffle_size) == 0
Пример #2
0
    def test_calculate_shuffle_buffer_size_small_row_size(self, mock_local_size, mock_allgather):
        import horovod.torch as hvd
        hvd.init()

        hvd_size = 4
        local_size = 2
        mock_local_size.return_value = local_size
        mock_allgather.return_value = torch.tensor([local_size for _ in range(hvd_size)])

        avg_row_size = 100
        train_row_count_per_worker = 100

        calculate_shuffle_buffer_size = remote._calculate_shuffle_buffer_size_fn(
            train_row_count_per_worker, avg_row_size, None)
        shuffle_size = calculate_shuffle_buffer_size()
        assert shuffle_size == train_row_count_per_worker