def test_dist_broadcast_coalesced(self): # Set up process group. store = c10d.TCPStore('localhost', self.port, self.is_master) options = c10d.ProcessGroupGloo.Options() options.devices = [ c10d.ProcessGroupGloo.create_tcp_device(interface="lo") ] process_group = c10d.ProcessGroupGloo(store, self.rank, self.world_size, options) device = torch.device('cuda') target = torch.arange(10, dtype=torch.float64, device=device).chunk(5) if self.is_master: # All processes should have these tensors in the end. tensors = target else: # Non-master processes start with empty tensors and should be # filled with the tensors from the master. tensors = torch.zeros(10, device=device).chunk(5) c10d._dist_broadcast_coalesced(tensors, buffer_size=10, process_group=process_group) if not self.is_master: self.assertEqual(tensors, target)
def _dist_broadcast_coalesced(tensors, buffer_size=None, process_group=None): if process_group is None: process_group = dist.distributed_c10d._get_default_group() if buffer_size is None: buffer_size = BROADCAST_BUCKET_SIZE dist._dist_broadcast_coalesced(process_group, tensors, buffer_size, False)
def _dist_broadcast_coalesced(self, tensors, buffer_size): dist._dist_broadcast_coalesced(self.process_group, tensors, buffer_size, False)