示例#1
0
  def test_stability(self):
    hvd.init()
    # TODO support non-MPI Adasum operation
    if not hvd.mpi_enabled():
      self.skipTest("MPI not enabled")

    device = torch.device('cuda:{}'.format(hvd.local_rank())) if torch.cuda.is_available() else torch.device('cpu')
    np.random.seed(2)
    torch.manual_seed(2)
    size = hvd.size()
    local_size = hvd.local_size()
    rank = hvd.rank()

    for data_type in self.data_types:
      N = 1024
      a = np.random.normal(0, np.finfo(data_type).tiny, (N, 1)).astype(np.float64)
      r = np.random.normal(0, 1, (size, 1)).astype(np.float64)
      q = np.dot(a,r.T).astype(data_type).astype(np.float64)
      tensor = np.zeros(N,dtype=data_type)
      tensor[:] = q[:,hvd.rank()]

      tensor = torch.from_numpy(tensor).to(device)

      hvd.allreduce_(tensor, op=hvd.Adasum)

      expected = np.sum(q,axis=1) / size
      comp = self.are_close(data_type, expected, tensor.cpu().numpy()) 
      if comp:
        print('Stability test passed')
      else:
        print('computed: ', tensor)
        print('expected: ', expected)
        print('off by: ', self.diff_ratio(expected,tensor.cpu().numpy()))
      assert comp
示例#2
0
def init():
    hvd.init()
    if not hvd.mpi_enabled():
        raise ModuleNotFoundError("MPI not enabled for Horovod")
    _basics.init()
    dir_path = os.path.dirname(so_path)
    full_path = os.path.join(dir_path, so_name + get_ext_suffix())
    torch.ops.load_library(full_path)
示例#3
0
    def test_parallel(self):
        hvd.init()
        # TODO support non-MPI Adasum operation
        # Only do this test if there are GPUs available.
        if not hvd.mpi_enabled() or not torch.cuda.is_available():
            self.skipTest("No GPUs available")

        device = torch.device('cuda:{}'.format(hvd.local_rank()))
        np.random.seed(2)
        torch.manual_seed(2)
        size = hvd.size()
        local_size = hvd.local_size()
        rank = hvd.rank()

        for data_type in self.data_types:
            all_Ns = [size * 20 - 13, size * 2 + 1, size + 2, 2**19]
            tensors = []
            all_qs = []
            for N in all_Ns:
                a = np.random.normal(0, 1, (N, 1)).astype(np.float64)
                r = np.random.normal(0, 1, (size, 1)).astype(np.float64)
                q = np.dot(a, r.T)
                q = q.astype(data_type)
                all_qs.append(q.astype(np.float64))
                tensors.append(q[:, hvd.rank()])

            tensors = list(
                map(lambda x: torch.from_numpy(x).to(device), tensors))

            handles = [
                hvd.allreduce_async(tensor, op=hvd.Adasum)
                for tensor in tensors
            ]

            reduced_tensors = [synchronize(h) for h in handles]

            expected = [np.sum(q, axis=1) / size for q in all_qs]
            all_comp = [
                self.are_close(data_type, e,
                               rt.cpu().numpy())
                for e, rt in zip(expected, reduced_tensors)
            ]
            if np.alltrue(all_comp):
                print('Parallel test passed')
            else:
                for c, e, rt in zip(all_comp, expected, reduced_tensors):
                    if c == False:
                        print('computed: ', rt)
                        print('expected: ', e)
                        print('off by: ', self.diff_ratio(e, rt.cpu().numpy()))
            assert np.alltrue(all_comp)
示例#4
0
    def test_stability_2(self):
        hvd.init()
        # TODO support non-MPI Adasum operation
        if not hvd.mpi_enabled():
            return
        device = torch.device('cuda:{}'.format(hvd.local_rank(
        ))) if torch.cuda.is_available() else torch.device('cpu')
        np.random.seed(2)
        torch.manual_seed(2)
        size = hvd.size()
        local_size = hvd.local_size()
        rank = hvd.rank()

        for data_type in self.data_types:
            N = 1024
            dt_min = np.finfo(data_type).tiny.astype(np.float64)
            dt_max = math.sqrt(np.finfo(data_type).max.astype(np.float64))
            a = np.random.normal(0, 1, (N, 1)).astype(np.float64)
            r = np.array([
                dt_max**(float(i + 1) / float(size)) *
                dt_min**(float(size - i - 1) / float(size))
                for i in range(size)
            ]).reshape(size, 1).astype(np.float64)
            np.random.shuffle(r)
            q = np.dot(a, r.T).astype(data_type).astype(np.float64)
            tensor = np.zeros(N, dtype=data_type)
            tensor[:] = q[:, hvd.rank()]

            tensor = torch.from_numpy(tensor).to(device)

            hvd.allreduce_(tensor, op=hvd.Adasum)

            expected = np.sum(q, axis=1) / size
            comp = self.are_close(data_type, expected, tensor.cpu().numpy())
            if comp:
                print('Stability 2 test passed')
            else:
                print('computed: ', tensor)
                print('expected: ', expected)
                print('off by: ',
                      self.diff_ratio(expected,
                                      tensor.cpu().numpy()))
            assert comp