def test_calculate_shuffle_buffer_size(self, mock_local_size, mock_allgather): import horovod.torch as hvd hvd.init() # case with 2 workers, one with 5 ranks and second with 3 ranks mock_allgather.return_value = torch.tensor([5, 5, 5, 5, 5, 3, 3, 3]) mock_local_size.return_value = 2 avg_row_size = 100000 train_row_count_per_worker = 1000000 calculate_shuffle_buffer_size = remote._calculate_shuffle_buffer_size_fn( train_row_count_per_worker, avg_row_size, None) shuffle_size = calculate_shuffle_buffer_size() actual = int(shuffle_size) expected = int(constants.TOTAL_BUFFER_MEMORY_CAP_GIB * constants.BYTES_PER_GIB / avg_row_size / 5) assert actual == expected calculate_shuffle_buffer_size = remote._calculate_shuffle_buffer_size_fn( train_row_count_per_worker, avg_row_size, 0) shuffle_size = calculate_shuffle_buffer_size() # Set 0 for non-shuffle assert int(shuffle_size) == 0
def test_calculate_shuffle_buffer_size_small_row_size(self, mock_local_size, mock_allgather): import horovod.torch as hvd hvd.init() hvd_size = 4 local_size = 2 mock_local_size.return_value = local_size mock_allgather.return_value = torch.tensor([local_size for _ in range(hvd_size)]) avg_row_size = 100 train_row_count_per_worker = 100 calculate_shuffle_buffer_size = remote._calculate_shuffle_buffer_size_fn( train_row_count_per_worker, avg_row_size, None) shuffle_size = calculate_shuffle_buffer_size() assert shuffle_size == train_row_count_per_worker