示例#1
0
def _mean_reduce(rank: int, world_size: int) -> None:
    _setup(rank, world_size)

    to_sreduce = torch.tensor(rank + 1, dtype=torch.float).to(rank)
    actual = mean_reduce(to_sreduce, world_size)

    assert actual == torch.tensor(((world_size + 1) / 2), dtype=torch.float).to(rank)

    _cleanup()
示例#2
0
 def mean_reduce_ddp_metrics(self, metrics: Dict) -> Dict:
     """Syncs ``metrics`` over ``world_size`` in the distributed mode."""
     metrics = {
         k: mean_reduce(
             torch.tensor(v, device=self.device),
             world_size=self.state.num_processes,
         )
         for k, v in metrics.items()
     }
     return metrics
示例#3
0
    def sync_tensor(self, tensor: torch.Tensor, mode: str):
        """Syncs ``tensor`` over ``world_size`` in distributed mode.

        Args:
            tensor: tensor to sync across the processes.
            mode: tensor synchronization type,
                should be one of 'sum' or 'mean'.
                Default is 'mean'.

        Returns:
            torch.Tensor with synchronized values.

        Raises:
            ValueError: if mode is out of ``sum`` or ``mean``
        """
        if mode not in {"sum", "mean"}:
            raise ValueError(f"Unknown sync_type '{mode}'")
        if mode == "sum":
            return sum_reduce(tensor)
        else:
            return mean_reduce(tensor, self.world_size)
示例#4
0
 def mean_reduce_ddp_metrics(self, metrics: Dict) -> Dict:
     """Syncs ``metrics`` over ``world_size`` in the distributed mode."""
     if self.state.distributed_type in [
         DistributedType.MULTI_CPU,
         DistributedType.MULTI_GPU,
     ]:
         metrics = {
             k: mean_reduce(
                 torch.tensor(v, device=self.device),
                 world_size=self.state.num_processes,
             )
             for k, v in metrics.items()
         }
     elif self.state.distributed_type == DistributedType.TPU:
         metrics = {
             k: xm.mesh_reduce(
                 k, v.item() if isinstance(v, torch.Tensor) else v, np.mean
             )
             for k, v in metrics.items()
         }
     return metrics