def test_var(self): array_0_len = ht.MPI_WORLD.size * 2 array_1_len = ht.MPI_WORLD.size * 2 array_2_len = ht.MPI_WORLD.size * 2 # test raises x = ht.zeros((2, 3, 4)) with self.assertRaises(ValueError): x.var(axis=10) with self.assertRaises(ValueError): x.var(axis=[4]) with self.assertRaises(ValueError): x.var(axis=[-4]) with self.assertRaises(TypeError): ht.var(x, axis="01") with self.assertRaises(ValueError): ht.var(x, axis=(0, "10")) with self.assertRaises(ValueError): ht.var(x, axis=(0, 0)) with self.assertRaises(NotImplementedError): ht.var(x, ddof=2) with self.assertRaises(ValueError): ht.var(x, ddof=-2) with self.assertRaises(ValueError): ht.mean(x, axis=torch.Tensor([0, 0])) a = ht.arange(1, 5) self.assertEqual(a.var(ddof=1), 1.666666666666666) # ones dimensions = [] for d in [array_0_len, array_1_len, array_2_len]: dimensions.extend([d]) hold = list(range(len(dimensions))) hold.append(None) for split in hold: # loop over the number of dimensions of the test array z = ht.ones(dimensions, split=split) res = z.var(ddof=0) total_dims_list = list(z.shape) self.assertTrue((res == 0).all()) # loop over the different single dimensions for var for it in range(len(z.shape)): res = z.var(axis=it) self.assertTrue(ht.allclose(res, 0)) target_dims = [ total_dims_list[q] for q in range(len(total_dims_list)) if q != it ] if not target_dims: target_dims = () self.assertEqual(res.gshape, tuple(target_dims)) if z.split is None: sp = None else: sp = z.split if it > z.split else z.split - 1 if it == split: sp = None self.assertEqual(res.split, sp) if split == it: res = z.var(axis=it) self.assertTrue(ht.allclose(res, 0)) loop_list = [ ",".join(map(str, comb)) for comb in combinations(list(range(len(z.shape))), 2) ] for it in loop_list: # loop over the different combinations of dimensions for var lp_split = [int(q) for q in it.split(",")] res = z.var(axis=lp_split) self.assertTrue((res == 0).all()) target_dims = [ total_dims_list[q] for q in range(len(total_dims_list)) if q not in lp_split ] if not target_dims: target_dims = (1, ) if res.gshape: self.assertEqual(res.gshape, tuple(target_dims)) if res.split is not None: if any([split >= x for x in lp_split]): self.assertEqual(res.split, len(target_dims) - 1) else: self.assertEqual(res.split, z.split) # values for the iris dataset var measured by libreoffice calc for sp in [None, 0, 1]: iris = ht.load("heat/datasets/data/iris.csv", sep=";", split=sp) self.assertTrue( ht.allclose(ht.var(iris, bessel=True), 3.90318519755147))
def test___binary_bit_op_broadcast(self): # broadcast without split left_tensor = ht.ones((4, 1), dtype=ht.int32) right_tensor = ht.ones((1, 2), dtype=ht.int32) result = left_tensor & right_tensor self.assertEqual(result.shape, (4, 2)) result = right_tensor & left_tensor self.assertEqual(result.shape, (4, 2)) # broadcast with split=0 for both operants left_tensor = ht.ones((4, 1), split=0, dtype=ht.int32) right_tensor = ht.ones((1, 2), split=0, dtype=ht.int32) result = left_tensor | right_tensor self.assertEqual(result.shape, (4, 2)) result = right_tensor | left_tensor self.assertEqual(result.shape, (4, 2)) # broadcast with split=1 for both operants left_tensor = ht.ones((4, 1), split=1, dtype=ht.int32) right_tensor = ht.ones((1, 2), split=1, dtype=ht.int32) result = left_tensor ^ right_tensor self.assertEqual(result.shape, (4, 2)) result = right_tensor ^ left_tensor self.assertEqual(result.shape, (4, 2)) # broadcast with split=1 for second operant left_tensor = ht.ones((4, 1), dtype=ht.int32) right_tensor = ht.ones((1, 2), split=1, dtype=ht.int32) result = left_tensor & right_tensor self.assertEqual(result.shape, (4, 2)) result = right_tensor & left_tensor self.assertEqual(result.shape, (4, 2)) # broadcast with split=0 for first operant left_tensor = ht.ones((4, 1), split=0, dtype=ht.int32) right_tensor = ht.ones((1, 2), dtype=ht.int32) result = left_tensor | right_tensor self.assertEqual(result.shape, (4, 2)) result = right_tensor | left_tensor self.assertEqual(result.shape, (4, 2)) # broadcast with unequal dimensions and one splitted tensor left_tensor = ht.ones((2, 4, 1), split=0, dtype=ht.int32) right_tensor = ht.ones((1, 2), dtype=ht.int32) result = left_tensor ^ right_tensor self.assertEqual(result.shape, (2, 4, 2)) result = right_tensor ^ left_tensor self.assertEqual(result.shape, (2, 4, 2)) # broadcast with unequal dimensions, a scalar, and one splitted tensor left_scalar = ht.np.int32(1) right_tensor = ht.ones((1, 2), split=0, dtype=ht.int32) result = ht.bitwise_or(left_scalar, right_tensor) self.assertEqual(result.shape, (1, 2)) result = right_tensor | left_scalar self.assertEqual(result.shape, (1, 2)) # broadcast with unequal dimensions and two splitted tensors left_tensor = ht.ones((4, 1, 3, 1, 2), split=2, dtype=torch.uint8) right_tensor = ht.ones((1, 3, 1), split=0, dtype=torch.uint8) result = left_tensor & right_tensor self.assertEqual(result.shape, (4, 1, 3, 3, 2)) result = right_tensor & left_tensor self.assertEqual(result.shape, (4, 1, 3, 3, 2)) with self.assertRaises(TypeError): ht.bitwise_and(ht.ones((1, 2)), "wrong type") with self.assertRaises(NotImplementedError): ht.bitwise_or(ht.ones((1, 2), dtype=ht.int32, split=0), ht.ones((1, 2), dtype=ht.int32, split=1)) a = ht.ones((4, 4), split=None) b = ht.zeros((4, 4), split=0) self.assertTrue(ht.equal(a * b, b)) self.assertTrue(ht.equal(b * a, b)) self.assertTrue(ht.equal(a[0] * b[0], b[0])) self.assertTrue(ht.equal(b[0] * a[0], b[0])) self.assertTrue(ht.equal(a * b[0:1], b)) self.assertTrue(ht.equal(b[0:1] * a, b)) self.assertTrue(ht.equal(a[0:1] * b, b)) self.assertTrue(ht.equal(b * a[0:1], b)) c = ht.array([1, 2, 3, 4], comm=ht.MPI_SELF) with self.assertRaises(NotImplementedError): b + c with self.assertRaises(TypeError): ht.minimum(a, np.float128(1)) with self.assertRaises(TypeError): ht.minimum(np.float128(1), a) with self.assertRaises(NotImplementedError): a.resplit(1) * b with self.assertRaises(ValueError): a[2:] * b
def test_average(self): data = [[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]] ht_array = ht.array(data, dtype=float) comparison = np.asanyarray(data) # check global average avg = ht.average(ht_array) self.assertIsInstance(avg, ht.DNDarray) self.assertEqual(avg.shape, ()) self.assertEqual(avg.lshape, ()) self.assertEqual(avg.split, None) self.assertEqual(avg.dtype, ht.float32) self.assertEqual(avg._DNDarray__array.dtype, torch.float32) self.assertEqual(avg.numpy(), np.average(comparison)) # average along first axis avg_vertical = ht.average(ht_array, axis=0) self.assertIsInstance(avg_vertical, ht.DNDarray) self.assertEqual(avg_vertical.shape, (3, )) self.assertEqual(avg_vertical.lshape, (3, )) self.assertEqual(avg_vertical.split, None) self.assertEqual(avg_vertical.dtype, ht.float32) self.assertEqual(avg_vertical._DNDarray__array.dtype, torch.float32) self.assertTrue((avg_vertical.numpy() == np.average(comparison, axis=0)).all()) # average along second axis avg_horizontal = ht.average(ht_array, axis=1) self.assertIsInstance(avg_horizontal, ht.DNDarray) self.assertEqual(avg_horizontal.shape, (4, )) self.assertEqual(avg_horizontal.lshape, (4, )) self.assertEqual(avg_horizontal.split, None) self.assertEqual(avg_horizontal.dtype, ht.float32) self.assertEqual(avg_horizontal._DNDarray__array.dtype, torch.float32) self.assertTrue((avg_horizontal.numpy() == np.average(comparison, axis=1)).all()) # check weighted average over all float elements of split 3d tensor, across split axis random_volume = ht.array(torch.randn((3, 3, 3), dtype=torch.float64, device=self.device.torch_device), is_split=1) size = random_volume.comm.size random_weights = ht.array(torch.randn((3 * size, ), dtype=torch.float64, device=self.device.torch_device), split=0) avg_volume = ht.average(random_volume, weights=random_weights, axis=1) np_avg_volume = np.average(random_volume.numpy(), weights=random_weights.numpy(), axis=1) self.assertIsInstance(avg_volume, ht.DNDarray) self.assertEqual(avg_volume.shape, (3, 3)) self.assertEqual(avg_volume.lshape, (3, 3)) self.assertEqual(avg_volume.dtype, ht.float64) self.assertEqual(avg_volume._DNDarray__array.dtype, torch.float64) self.assertEqual(avg_volume.split, None) self.assertAlmostEqual(avg_volume.numpy().all(), np_avg_volume.all()) avg_volume_with_cumwgt = ht.average(random_volume, weights=random_weights, axis=1, returned=True) self.assertIsInstance(avg_volume_with_cumwgt, tuple) self.assertIsInstance(avg_volume_with_cumwgt[1], ht.DNDarray) self.assertEqual(avg_volume_with_cumwgt[1].gshape, avg_volume_with_cumwgt[0].gshape) self.assertEqual(avg_volume_with_cumwgt[1].split, avg_volume_with_cumwgt[0].split) # check weighted average over all float elements of split 3d tensor (3d weights) random_weights_3d = ht.array(torch.randn( (3, 3, 3), dtype=torch.float64, device=self.device.torch_device), is_split=1) avg_volume = ht.average(random_volume, weights=random_weights_3d, axis=1) np_avg_volume = np.average(random_volume.numpy(), weights=random_weights.numpy(), axis=1) self.assertIsInstance(avg_volume, ht.DNDarray) self.assertEqual(avg_volume.shape, (3, 3)) self.assertEqual(avg_volume.lshape, (3, 3)) self.assertEqual(avg_volume.dtype, ht.float64) self.assertEqual(avg_volume._DNDarray__array.dtype, torch.float64) self.assertEqual(avg_volume.split, None) self.assertAlmostEqual(avg_volume.numpy().all(), np_avg_volume.all()) avg_volume_with_cumwgt = ht.average(random_volume, weights=random_weights, axis=1, returned=True) self.assertIsInstance(avg_volume_with_cumwgt, tuple) self.assertIsInstance(avg_volume_with_cumwgt[1], ht.DNDarray) self.assertEqual(avg_volume_with_cumwgt[1].gshape, avg_volume_with_cumwgt[0].gshape) self.assertEqual(avg_volume_with_cumwgt[1].split, avg_volume_with_cumwgt[0].split) # check average over all float elements of split 3d tensor, tuple axis random_volume = ht.random.randn(3, 3, 3, split=0) avg_volume = ht.average(random_volume, axis=(1, 2)) self.assertIsInstance(avg_volume, ht.DNDarray) self.assertEqual(avg_volume.shape, (3, )) self.assertEqual(avg_volume.lshape[0], random_volume.lshape[0]) self.assertEqual(avg_volume.dtype, ht.float32) self.assertEqual(avg_volume._DNDarray__array.dtype, torch.float32) self.assertEqual(avg_volume.split, 0) # check weighted average over all float elements of split 5d tensor, along split axis random_5d = ht.random.randn(random_volume.comm.size, 2, 3, 4, 5, split=0) axis = random_5d.split random_weights = ht.random.randn(random_5d.gshape[axis], split=0) avg_5d = random_5d.average(weights=random_weights, axis=axis) self.assertIsInstance(avg_5d, ht.DNDarray) self.assertEqual(avg_5d.gshape, (2, 3, 4, 5)) self.assertLessEqual(avg_5d.lshape[1], 3) self.assertEqual(avg_5d.dtype, ht.float32) self.assertEqual(avg_5d._DNDarray__array.dtype, torch.float32) self.assertEqual(avg_5d.split, None) # check exceptions with self.assertRaises(TypeError): ht.average(comparison) with self.assertRaises(TypeError): ht.average(random_5d, weights=random_weights.numpy(), axis=axis) with self.assertRaises(TypeError): ht.average(random_5d, weights=random_weights, axis=None) with self.assertRaises(NotImplementedError): ht.average(random_5d, weights=random_weights, axis=(1, 2)) random_weights = ht.random.randn(random_5d.gshape[axis], random_5d.gshape[axis + 1]) with self.assertRaises(TypeError): ht.average(random_5d, weights=random_weights, axis=axis) random_shape_weights = ht.random.randn(random_5d.gshape[axis] + 1) with self.assertRaises(ValueError): ht.average(random_5d, weights=random_shape_weights, axis=axis) zero_weights = ht.zeros((random_5d.gshape[axis]), split=0) with self.assertRaises(ZeroDivisionError): ht.average(random_5d, weights=zero_weights, axis=axis) weights_5d_split_mismatch = ht.ones(random_5d.gshape, split=-1) with self.assertRaises(NotImplementedError): ht.average(random_5d, weights=weights_5d_split_mismatch, axis=axis) with self.assertRaises(TypeError): ht_array.average(axis=1.1) with self.assertRaises(TypeError): ht_array.average(axis="y") with self.assertRaises(ValueError): ht.average(ht_array, axis=-4)
def test_mean(self): array_0_len = 5 array_1_len = 5 array_2_len = 5 x = ht.zeros((2, 3, 4)) with self.assertRaises(ValueError): x.mean(axis=10) with self.assertRaises(ValueError): x.mean(axis=[4]) with self.assertRaises(ValueError): x.mean(axis=[-4]) with self.assertRaises(TypeError): ht.mean(x, axis="01") with self.assertRaises(ValueError): ht.mean(x, axis=(0, "10")) with self.assertRaises(ValueError): ht.mean(x, axis=(0, 0)) with self.assertRaises(ValueError): ht.mean(x, axis=torch.Tensor([0, 0])) a = ht.arange(1, 5) self.assertEqual(a.mean(), 2.5) # ones dimensions = [] for d in [array_0_len, array_1_len, array_2_len]: dimensions.extend([d]) hold = list(range(len(dimensions))) hold.append(None) for split in hold: # loop over the number of split dimension of the test array z = ht.ones(dimensions, split=split) res = z.mean() total_dims_list = list(z.shape) self.assertTrue((res == 1).all()) for it in range( len(z.shape) ): # loop over the different single dimensions for mean res = z.mean(axis=it) self.assertTrue((res == 1).all()) target_dims = [ total_dims_list[q] for q in range(len(total_dims_list)) if q != it ] if not target_dims: target_dims = () self.assertEqual(res.gshape, tuple(target_dims)) if z.split is None: sp = None else: sp = z.split if it > z.split else z.split - 1 if it == split: sp = None self.assertEqual(res.split, sp) loop_list = [ ",".join(map(str, comb)) for comb in combinations(list(range(len(z.shape))), 2) ] for it in loop_list: # loop over the different combinations of dimensions for mean lp_split = [int(q) for q in it.split(",")] res = z.mean(axis=lp_split) self.assertTrue((res == 1).all()) target_dims = [ total_dims_list[q] for q in range(len(total_dims_list)) if q not in lp_split ] if not target_dims: target_dims = (1, ) if res.gshape: self.assertEqual(res.gshape, tuple(target_dims)) if res.split is not None: if any([split >= x for x in lp_split]): self.assertEqual(res.split, len(target_dims) - 1) else: self.assertEqual(res.split, z.split) # values for the iris dataset mean measured by libreoffice calc ax0 = ht.array( [5.84333333333333, 3.054, 3.75866666666667, 1.19866666666667]) for sp in [None, 0, 1]: iris = ht.load("heat/datasets/data/iris.csv", sep=";", split=sp) self.assertTrue(ht.allclose(ht.mean(iris), 3.46366666666667)) self.assertTrue(ht.allclose(ht.mean(iris, axis=0), ax0))
def test_qr(self): m, n = 20, 40 st = torch.randn(m, n, device=device, dtype=torch.float) a_comp = ht.array(st, split=0, device=ht_device) for t in range(1, 3): for sp in range(2): a = ht.array(st, split=sp, device=ht_device, dtype=torch.float) qr = a.qr(tiles_per_proc=t) self.assertTrue( ht.allclose((a_comp - (qr.Q @ qr.R)), 0, rtol=1e-5, atol=1e-5)) self.assertTrue( ht.allclose(qr.Q.T @ qr.Q, ht.eye(m, device=ht_device), rtol=1e-5, atol=1e-5)) self.assertTrue( ht.allclose(ht.eye(m, device=ht_device), qr.Q @ qr.Q.T, rtol=1e-5, atol=1e-5)) m, n = 40, 40 st1 = torch.randn(m, n, device=device) a_comp1 = ht.array(st1, split=0, device=ht_device) for t in range(1, 3): for sp in range(2): a1 = ht.array(st1, split=sp, device=ht_device) qr1 = a1.qr(tiles_per_proc=t) self.assertTrue( ht.allclose((a_comp1 - (qr1.Q @ qr1.R)), 0, rtol=1e-5, atol=1e-5)) self.assertTrue( ht.allclose(qr1.Q.T @ qr1.Q, ht.eye(m, device=ht_device), rtol=1e-5, atol=1e-5)) self.assertTrue( ht.allclose(ht.eye(m, device=ht_device), qr1.Q @ qr1.Q.T, rtol=1e-5, atol=1e-5)) m, n = 40, 20 st2 = torch.randn(m, n, dtype=torch.double, device=device) a_comp2 = ht.array(st2, split=0, dtype=ht.double, device=ht_device) for t in range(1, 3): for sp in range(2): a2 = ht.array(st2, split=sp, device=ht_device) qr2 = a2.qr(tiles_per_proc=t) self.assertTrue( ht.allclose(a_comp2, qr2.Q @ qr2.R, rtol=1e-5, atol=1e-5)) self.assertTrue( ht.allclose( qr2.Q.T @ qr2.Q, ht.eye(m, dtype=ht.double, device=ht_device), rtol=1e-5, atol=1e-5, )) self.assertTrue( ht.allclose( ht.eye(m, dtype=ht.double, device=ht_device), qr2.Q @ qr2.Q.T, rtol=1e-5, atol=1e-5, )) # test if calc R alone works qr = ht.qr(a2, calc_q=False, overwrite_a=True) self.assertTrue(qr.Q is None) m, n = 40, 20 st = torch.randn(m, n, device=device) a_comp = ht.array(st, split=None, device=ht_device) a = ht.array(st, split=None, device=ht_device) qr = a.qr() self.assertTrue(ht.allclose(a_comp, qr.Q @ qr.R, rtol=1e-5, atol=1e-5)) self.assertTrue( ht.allclose(qr.Q.T @ qr.Q, ht.eye(m, device=ht_device), rtol=1e-5, atol=1e-5)) self.assertTrue( ht.allclose(ht.eye(m, device=ht_device), qr.Q @ qr.Q.T, rtol=1e-5, atol=1e-5)) # raises with self.assertRaises(TypeError): ht.qr(np.zeros((10, 10))) with self.assertRaises(TypeError): ht.qr(a_comp, tiles_per_proc="ls") with self.assertRaises(TypeError): ht.qr(a_comp, tiles_per_proc=1, calc_q=30) with self.assertRaises(TypeError): ht.qr(a_comp, tiles_per_proc=1, overwrite_a=30) with self.assertRaises(ValueError): ht.qr(a_comp, tiles_per_proc=torch.tensor([1, 2, 3])) with self.assertRaises(ValueError): ht.qr(ht.zeros((3, 4, 5)))
def test_cov(self): x = ht.array([[0, 2], [1, 1], [2, 0]], dtype=ht.float, split=1).T if x.comm.size < 3: cov = ht.cov(x) actual = ht.array([[1, -1], [-1, 1]], split=0) self.assertTrue(ht.equal(cov, actual)) data = np.loadtxt("heat/datasets/data/iris.csv", delimiter=";") np_cov = np.cov(data[:, 0], data[:, 1:3], rowvar=False) htdata = ht.load("heat/datasets/data/iris.csv", sep=";", split=0) ht_cov = ht.cov(htdata[:, 0], htdata[:, 1:3], rowvar=False) comp = ht.array(np_cov, dtype=ht.float) self.assertTrue(ht.allclose(comp - ht_cov, 0, atol=1e-4)) np_cov = np.cov(data, rowvar=False) ht_cov = ht.cov(htdata, rowvar=False) self.assertTrue( ht.allclose(ht.array(np_cov, dtype=ht.float) - ht_cov, 0, atol=1e-4)) np_cov = np.cov(data, rowvar=False, ddof=1) ht_cov = ht.cov(htdata, rowvar=False, ddof=1) self.assertTrue( ht.allclose(ht.array(np_cov, dtype=ht.float) - ht_cov, 0, atol=1e-4)) np_cov = np.cov(data, rowvar=False, bias=True) ht_cov = ht.cov(htdata, rowvar=False, bias=True) self.assertTrue( ht.allclose(ht.array(np_cov, dtype=ht.float) - ht_cov, 0, atol=1e-4)) if 1 < x.comm.size < 5: htdata = ht.load("heat/datasets/data/iris.csv", sep=";", split=1) np_cov = np.cov(data, rowvar=False) ht_cov = ht.cov(htdata, rowvar=False) self.assertTrue( ht.allclose(ht.array(np_cov, dtype=ht.float), ht_cov, atol=1e-4)) np_cov = np.cov(data, data, rowvar=True) htdata = ht.load("heat/datasets/data/iris.csv", sep=";", split=0) ht_cov = ht.cov(htdata, htdata, rowvar=True) self.assertTrue( ht.allclose(ht.array(np_cov, dtype=ht.float), ht_cov, atol=1e-4)) htdata = ht.load("heat/datasets/data/iris.csv", sep=";", split=0) with self.assertRaises(RuntimeError): ht.cov(htdata[1:], rowvar=False) with self.assertRaises(RuntimeError): ht.cov(htdata, htdata[1:], rowvar=False) with self.assertRaises(TypeError): ht.cov(np_cov) with self.assertRaises(TypeError): ht.cov(htdata, np_cov) with self.assertRaises(TypeError): ht.cov(htdata, ddof="str") with self.assertRaises(ValueError): ht.cov(ht.zeros((1, 2, 3))) with self.assertRaises(ValueError): ht.cov(htdata, ht.zeros((1, 2, 3))) with self.assertRaises(ValueError): ht.cov(htdata, ddof=10000)
def _initialize_cluster_centers(self, X): """ Initializes the K-Means centroids. Parameters ---------- X : ht.DNDarray, shape=(n_point, n_features) The data to initialize the clusters for. """ # always initialize the random state if self.random_state is not None: ht.random.seed(self.random_state) # initialize the centroids by randomly picking some of the points if self.init == "random": # Samples will be equally distributed drawn from all involved processes _, displ, _ = X.comm.counts_displs_shape(shape=X.shape, axis=0) centroids = ht.empty((X.shape[1], self.n_clusters), split=None, device=X.device, comm=X.comm) if (X.split is None) or (X.split == 0): for i in range(self.n_clusters): samplerange = ( X.gshape[0] // self.n_clusters * i, X.gshape[0] // self.n_clusters * (i + 1), ) sample = ht.random.randint(samplerange[0], samplerange[1]).item() proc = 0 for p in range(X.comm.size): if displ[p] > sample: break proc = p xi = ht.zeros(X.shape[1], dtype=X.dtype) if X.comm.rank == proc: idx = sample - displ[proc] xi = ht.array(X.lloc[idx, :], device=X.device, comm=X.comm) xi.comm.Bcast(xi, root=proc) centroids[:, i] = xi else: raise NotImplementedError( "Not implemented for other splitting-axes") self._cluster_centers = centroids.expand_dims(axis=0) # directly passed centroids elif isinstance(self.init, ht.DNDarray): if len(self.init.shape) != 2: raise ValueError( "passed centroids need to be two-dimensional, but are {}". format(len(self.init))) if self.init.shape[0] != self.n_clusters or self.init.shape[ 1] != X.shape[1]: raise ValueError( "passed centroids do not match cluster count or data shape" ) self._cluster_centers = self.init.resplit(None).T.expand_dims( axis=0) # kmeans++, smart centroid guessing elif self.init == "kmeans++": if (X.split is None) or (X.split == 0): X = X.expand_dims(axis=2) centroids = ht.empty((1, X.shape[1], self.n_clusters), split=None, device=X.device, comm=X.comm) sample = ht.random.randint(0, X.shape[0] - 1).item() _, displ, _ = X.comm.counts_displs_shape(shape=X.shape, axis=0) proc = 0 for p in range(X.comm.size): if displ[p] > sample: break proc = p x0 = ht.zeros(X.shape[1], dtype=X.dtype, device=X.device, comm=X.comm) if X.comm.rank == proc: idx = sample - displ[proc] x0 = ht.array(X.lloc[idx, :, 0], device=X.device, comm=X.comm) x0.comm.Bcast(x0, root=proc) centroids[0, :, 0] = x0 for i in range(1, self.n_clusters): distances = ((X - centroids[:, :, :i])**2).sum( axis=1, keepdim=True) D2 = distances.min(axis=2) D2.resplit_(axis=None) D2 = D2.squeeze() prob = D2 / D2.sum() x = ht.random.rand().item() sample = 0 sum = 0 for j in range(len(prob)): if sum > x: break sum += prob[j].item() sample = j proc = 0 for p in range(X.comm.size): if displ[p] > sample: break proc = p xi = ht.zeros(X.shape[1], dtype=X.dtype) if X.comm.rank == proc: idx = sample - displ[proc] xi = ht.array(X.lloc[idx, :, 0], device=X.device, comm=X.comm) xi.comm.Bcast(xi, root=proc) centroids[0, :, i] = xi else: raise NotImplementedError( "Not implemented for other splitting-axes") self._cluster_centers = centroids else: raise ValueError( 'init needs to be one of "random", ht.DNDarray or "kmeans++", but was {}' .format(self.init))
def _update_centroids(self, X, matching_centroids): """ Compute new centroid ``ci`` as closest sample to the median of the data points in ``X`` that are assigned to ``ci`` Parameters ---------- X : DNDarray Input data matching_centroids : DNDarray Array filled with indeces ``i`` indicating to which cluster ``ci`` each sample point in X is assigned """ new_cluster_centers = self._cluster_centers.copy() for i in range(self.n_clusters): # points in current cluster selection = (matching_centroids == i).astype(ht.int64) # Remove 0-element lines to avoid spoiling of median assigned_points = X * selection rows = (assigned_points.abs()).sum(axis=1) != 0 local = assigned_points._DNDarray__array[rows._DNDarray__array] clean = ht.array(local, is_split=X.split) clean.balance_() # failsafe in case no point is assigned to this cluster # draw a random datapoint to continue/restart if clean.shape[0] == 0: _, displ, _ = X.comm.counts_displs_shape(shape=X.shape, axis=0) sample = ht.random.randint(0, X.shape[0]).item() proc = 0 for p in range(X.comm.size): if displ[p] > sample: break proc = p xi = ht.zeros(X.shape[1], dtype=X.dtype) if X.comm.rank == proc: idx = sample - displ[proc] xi = ht.array(X.lloc[idx, :], device=X.device, comm=X.comm) xi.comm.Bcast(xi, root=proc) new_cluster_centers[i, :] = xi else: if clean.shape[0] <= ht.MPI_WORLD.size: clean.resplit_(axis=None) median = ht.median(clean, axis=0, keepdim=True) dist = self._metric(X, median) _, displ, _ = X.comm.counts_displs_shape(shape=X.shape, axis=0) idx = dist.argmin(axis=0, keepdim=False).item() proc = 0 for p in range(X.comm.size): if displ[p] > idx: break proc = p closest_point = ht.zeros(X.shape[1], dtype=X.dtype) if X.comm.rank == proc: lidx = idx - displ[proc] closest_point = ht.array(X.lloc[lidx, :], device=X.device, comm=X.comm) closest_point.comm.Bcast(closest_point, root=proc) new_cluster_centers[i, :] = closest_point return new_cluster_centers
def test_all(self): array_len = 9 # check all over all float elements of 1d tensor locally ones_noaxis = ht.ones(array_len) x = (ones_noaxis == 1).all() self.assertIsInstance(x, ht.DNDarray) self.assertEqual(x.shape, (1, )) self.assertEqual(x.lshape, (1, )) self.assertEqual(x.dtype, ht.bool) self.assertEqual(x.larray.dtype, torch.bool) self.assertEqual(x.split, None) self.assertEqual(x.larray, 1) out_noaxis = ht.zeros((1, )) ht.all(ones_noaxis, out=out_noaxis) self.assertEqual(out_noaxis.larray, 1) # check all over all float elements of split 1d tensor ones_noaxis_split = ht.ones(array_len, split=0) floats_is_one = ones_noaxis_split.all() self.assertIsInstance(floats_is_one, ht.DNDarray) self.assertEqual(floats_is_one.shape, (1, )) self.assertEqual(floats_is_one.lshape, (1, )) self.assertEqual(floats_is_one.dtype, ht.bool) self.assertEqual(floats_is_one.larray.dtype, torch.bool) self.assertEqual(floats_is_one.split, None) self.assertEqual(floats_is_one.larray, 1) out_noaxis = ht.zeros((1, )) ht.all(ones_noaxis_split, out=out_noaxis) self.assertEqual(out_noaxis.larray, 1) # check all over all integer elements of 1d tensor locally ones_noaxis_int = ht.ones(array_len).astype(ht.int) int_is_one = ones_noaxis_int.all() self.assertIsInstance(int_is_one, ht.DNDarray) self.assertEqual(int_is_one.shape, (1, )) self.assertEqual(int_is_one.lshape, (1, )) self.assertEqual(int_is_one.dtype, ht.bool) self.assertEqual(int_is_one.larray.dtype, torch.bool) self.assertEqual(int_is_one.split, None) self.assertEqual(int_is_one.larray, 1) out_noaxis = ht.zeros((1, )) ht.all(ones_noaxis_int, out=out_noaxis) self.assertEqual(out_noaxis.larray, 1) # check all over all integer elements of split 1d tensor ones_noaxis_split_int = ht.ones(array_len, split=0).astype(ht.int) split_int_is_one = ones_noaxis_split_int.all() self.assertIsInstance(split_int_is_one, ht.DNDarray) self.assertEqual(split_int_is_one.shape, (1, )) self.assertEqual(split_int_is_one.lshape, (1, )) self.assertEqual(split_int_is_one.dtype, ht.bool) self.assertEqual(split_int_is_one.larray.dtype, torch.bool) self.assertEqual(split_int_is_one.split, None) self.assertEqual(split_int_is_one.larray, 1) out_noaxis = ht.zeros((1, )) ht.all(ones_noaxis_split_int, out=out_noaxis) self.assertEqual(out_noaxis.larray, 1) # check all over all float elements of 3d tensor locally ones_noaxis_volume = ht.ones((3, 3, 3)) volume_is_one = ones_noaxis_volume.all() self.assertIsInstance(volume_is_one, ht.DNDarray) self.assertEqual(volume_is_one.shape, (1, )) self.assertEqual(volume_is_one.lshape, (1, )) self.assertEqual(volume_is_one.dtype, ht.bool) self.assertEqual(volume_is_one.larray.dtype, torch.bool) self.assertEqual(volume_is_one.split, None) self.assertEqual(volume_is_one.larray, 1) out_noaxis = ht.zeros((1, )) ht.all(ones_noaxis_volume, out=out_noaxis) self.assertEqual(out_noaxis.larray, 1) # check sequence is not all one sequence = ht.arange(array_len) sequence_is_one = sequence.all() self.assertIsInstance(sequence_is_one, ht.DNDarray) self.assertEqual(sequence_is_one.shape, (1, )) self.assertEqual(sequence_is_one.lshape, (1, )) self.assertEqual(sequence_is_one.dtype, ht.bool) self.assertEqual(sequence_is_one.larray.dtype, torch.bool) self.assertEqual(sequence_is_one.split, None) self.assertEqual(sequence_is_one.larray, 0) out_noaxis = ht.zeros((1, )) ht.all(sequence, out=out_noaxis) self.assertEqual(out_noaxis.larray, 0) # check all over all float elements of split 3d tensor ones_noaxis_split_axis = ht.ones((3, 3, 3), split=0) float_volume_is_one = ones_noaxis_split_axis.all(axis=0) self.assertIsInstance(float_volume_is_one, ht.DNDarray) self.assertEqual(float_volume_is_one.shape, (3, 3)) self.assertEqual(float_volume_is_one.all(axis=1).dtype, ht.bool) self.assertEqual(float_volume_is_one.larray.dtype, torch.bool) self.assertEqual(float_volume_is_one.split, None) out_noaxis = ht.zeros((3, 3)) ht.all(ones_noaxis_split_axis, axis=0, out=out_noaxis) # check all over all float elements of split 3d tensor with tuple axis ones_noaxis_split_axis = ht.ones((3, 3, 3), split=0) float_volume_is_one = ones_noaxis_split_axis.all(axis=(0, 1)) self.assertIsInstance(float_volume_is_one, ht.DNDarray) self.assertEqual(float_volume_is_one.shape, (3, )) self.assertEqual(float_volume_is_one.all(axis=0).dtype, ht.bool) self.assertEqual(float_volume_is_one.larray.dtype, torch.bool) self.assertEqual(float_volume_is_one.split, None) # check all over all float elements of split 5d tensor with negative axis ones_noaxis_split_axis_neg = ht.zeros((1, 2, 3, 4, 5), split=1) float_5d_is_one = ones_noaxis_split_axis_neg.all(axis=-2) self.assertIsInstance(float_5d_is_one, ht.DNDarray) self.assertEqual(float_5d_is_one.shape, (1, 2, 3, 5)) self.assertEqual(float_5d_is_one.dtype, ht.bool) self.assertEqual(float_5d_is_one.larray.dtype, torch.bool) self.assertEqual(float_5d_is_one.split, 1) out_noaxis = ht.zeros((1, 2, 3, 5), split=1) ht.all(ones_noaxis_split_axis_neg, axis=-2, out=out_noaxis) # exceptions with self.assertRaises(ValueError): ht.ones(array_len).all(axis=1) with self.assertRaises(ValueError): ht.ones(array_len).all(axis=-2) with self.assertRaises(ValueError): ht.ones((4, 4)).all(axis=0, out=out_noaxis) with self.assertRaises(TypeError): ht.ones(array_len).all(axis="bad_axis_type")
def test_var(self): array_0_len = ht.MPI_WORLD.size * 2 array_1_len = ht.MPI_WORLD.size * 2 array_2_len = ht.MPI_WORLD.size * 2 # test raises x = ht.zeros((2, 3, 4), device=ht_device) with self.assertRaises(TypeError): ht.var(x, axis=0, bessel=1) with self.assertRaises(ValueError): ht.var(x, axis=10) with self.assertRaises(TypeError): ht.var(x, axis="01") a = ht.arange(1, 5, device=ht_device) self.assertEqual(a.var(), 1.666666666666666) # ones dimensions = [] for d in [array_0_len, array_1_len, array_2_len]: dimensions.extend([d]) hold = list(range(len(dimensions))) hold.append(None) for split in hold: # loop over the number of dimensions of the test array z = ht.ones(dimensions, split=split, device=ht_device) res = z.var() total_dims_list = list(z.shape) self.assertTrue((res == 0).all()) # loop over the different single dimensions for mean for it in range(len(z.shape)): res = z.var(axis=it) self.assertTrue(ht.allclose(res, 0)) target_dims = [ total_dims_list[q] for q in range(len(total_dims_list)) if q != it ] if not target_dims: target_dims = () # print(split, it, z.shape, res.shape) self.assertEqual(res.gshape, tuple(target_dims)) # if res.split is not None: # if i >= it: # self.assertEqual(res.split, len(target_dims) - 1) # else: # self.assertEqual(res.split, z.split) if z.split is None: sp = None else: sp = z.split if it > z.split else z.split - 1 if it == split: sp = None self.assertEqual(res.split, sp) if split == it: res = z.var(axis=it) self.assertTrue(ht.allclose(res, 0)) z = ht.ones(dimensions, split=split, device=ht_device) res = z.var(bessel=False) self.assertTrue(ht.allclose(res, 0)) # values for the iris dataset var measured by libreoffice calc for sp in [None, 0, 1]: iris = ht.load("heat/datasets/data/iris.csv", sep=";", split=sp, device=ht_device) self.assertTrue( ht.allclose(ht.var(iris, bessel=True), 3.90318519755147))
def test_local_set_get(self): # this function will also test all of the start_stop functions and the # --------------------- local ----------- s0 ---------------- # (int), (int, int), (slice, int), (slice, slice), (int, slice) m_eq_n_s0 = ht.zeros((25, 25), split=0) m_eq_n_s0_t2 = ht.core.tiling.SquareDiagTiles(m_eq_n_s0, tiles_per_proc=2) k = (slice(0, 10), slice(2, None)) m_eq_n_s0_t2.local_set(key=k, value=1) lcl_key = m_eq_n_s0_t2.local_to_global(key=k, rank=m_eq_n_s0.comm.rank) st_sp = m_eq_n_s0_t2.get_start_stop(key=lcl_key) sz = st_sp[1] - st_sp[0], st_sp[3] - st_sp[2] lcl_slice = m_eq_n_s0._DNDarray__array[st_sp[0]:st_sp[1], st_sp[2]:st_sp[3]] lcl_shape = m_eq_n_s0_t2.local_get(key=(slice(None), slice(None))).shape self.assertEqual(lcl_shape, m_eq_n_s0.lshape) self.assertTrue(torch.all(lcl_slice - torch.ones(sz) == 0)) # reset base m_eq_n_s0._DNDarray__array[st_sp[0]:st_sp[1], st_sp[2]:st_sp[3]] = 0 k = (1, 1) m_eq_n_s0_t2.local_set(key=k, value=1) lcl_key = m_eq_n_s0_t2.local_to_global(key=k, rank=m_eq_n_s0.comm.rank) st_sp = m_eq_n_s0_t2.get_start_stop(key=lcl_key) sz = st_sp[1] - st_sp[0], st_sp[3] - st_sp[2] lcl_slice = m_eq_n_s0._DNDarray__array[st_sp[0]:st_sp[1], st_sp[2]:st_sp[3]] self.assertTrue(torch.all(lcl_slice - torch.ones(sz) == 0)) # reset base m_eq_n_s0._DNDarray__array[st_sp[0]:st_sp[1], st_sp[2]:st_sp[3]] = 0 k = 1 m_eq_n_s0_t2.local_set(key=k, value=1) lcl_key = m_eq_n_s0_t2.local_to_global(key=k, rank=m_eq_n_s0.comm.rank) st_sp = m_eq_n_s0_t2.get_start_stop(key=lcl_key) sz = st_sp[1] - st_sp[0], st_sp[3] - st_sp[2] lcl_slice = m_eq_n_s0._DNDarray__array[st_sp[0]:st_sp[1], st_sp[2]:st_sp[3]] self.assertTrue(torch.all(lcl_slice - torch.ones(sz) == 0)) # reset base m_eq_n_s0._DNDarray__array[st_sp[0]:st_sp[1], st_sp[2]:st_sp[3]] = 0 # --------------------- local ----------- s1 ---------------- m_eq_n_s1 = ht.zeros((25, 25), split=1) m_eq_n_s1_t2 = ht.core.tiling.SquareDiagTiles(m_eq_n_s1, tiles_per_proc=2) k = (slice(0, 2), slice(0, None)) m_eq_n_s1_t2.local_set(key=k, value=1) lcl_key = m_eq_n_s1_t2.local_to_global(key=k, rank=m_eq_n_s1.comm.rank) st_sp = m_eq_n_s1_t2.get_start_stop(key=lcl_key) sz = st_sp[1] - st_sp[0], st_sp[3] - st_sp[2] lcl_slice = m_eq_n_s1._DNDarray__array[st_sp[0]:st_sp[1], st_sp[2]:st_sp[3]] lcl_shape = m_eq_n_s1_t2.local_get(key=(slice(None), slice(None))).shape self.assertEqual(lcl_shape, m_eq_n_s1.lshape) self.assertTrue(torch.all(lcl_slice - torch.ones(sz) == 0)) # reset base m_eq_n_s1._DNDarray__array[st_sp[0]:st_sp[1], st_sp[2]:st_sp[3]] = 0 if ht.MPI_WORLD.size > 2: k = (5, 1) m_eq_n_s1_t2.local_set(key=k, value=1) lcl_key = m_eq_n_s1_t2.local_to_global( key=k, rank=m_eq_n_s1.comm.rank) st_sp = m_eq_n_s1_t2.get_start_stop(key=lcl_key) sz = st_sp[1] - st_sp[0], st_sp[3] - st_sp[2] lcl_slice = m_eq_n_s1._DNDarray__array[st_sp[0]:st_sp[1], st_sp[2]:st_sp[3]] self.assertTrue(torch.all(lcl_slice - torch.ones(sz) == 0)) # reset base m_eq_n_s1._DNDarray__array[st_sp[0]:st_sp[1], st_sp[2]:st_sp[3]] = 0 k = 2 m_eq_n_s1_t2.local_set(key=k, value=1) lcl_key = m_eq_n_s1_t2.local_to_global(key=k, rank=m_eq_n_s1.comm.rank) st_sp = m_eq_n_s1_t2.get_start_stop(key=lcl_key) sz = st_sp[1] - st_sp[0], st_sp[3] - st_sp[2] lcl_slice = m_eq_n_s1._DNDarray__array[st_sp[0]:st_sp[1], st_sp[2]:st_sp[3]] self.assertTrue(torch.all(lcl_slice - torch.ones(sz) == 0)) # reset base m_eq_n_s1._DNDarray__array[st_sp[0]:st_sp[1], st_sp[2]:st_sp[3]] = 0 # --------------------- global ---------- s0 ---------------- m_eq_n_s0 = ht.zeros((25, 25), split=0) m_eq_n_s0_t2 = ht.core.tiling.SquareDiagTiles(m_eq_n_s0, tiles_per_proc=2) k = 2 m_eq_n_s0_t2[k] = 1 if m_eq_n_s0_t2[k] is not None: st_sp = m_eq_n_s0_t2.get_start_stop(key=k) sz = st_sp[1] - st_sp[0], st_sp[3] - st_sp[2] lcl_slice = m_eq_n_s0._DNDarray__array[st_sp[0]:st_sp[1], st_sp[2]:st_sp[3]] self.assertTrue(torch.all(lcl_slice - torch.ones(sz) == 0)) # reset base m_eq_n_s0._DNDarray__array[st_sp[0]:st_sp[1], st_sp[2]:st_sp[3]] = 0 if ht.MPI_WORLD.size > 2: k = (5, 5) m_eq_n_s0_t2[k] = 1 if m_eq_n_s0_t2[k] is not None: st_sp = m_eq_n_s0_t2.get_start_stop(key=k) sz = st_sp[1] - st_sp[0], st_sp[3] - st_sp[2] lcl_slice = m_eq_n_s0._DNDarray__array[st_sp[0]:st_sp[1], st_sp[2]:st_sp[3]] self.assertTrue(torch.all(lcl_slice - torch.ones(sz) == 0)) # reset base m_eq_n_s0._DNDarray__array[st_sp[0]:st_sp[1], st_sp[2]:st_sp[3]] = 0 k = (slice(0, 2), slice(1, 5)) m_eq_n_s0_t2[k] = 1 if m_eq_n_s0_t2[k] is not None: st_sp = m_eq_n_s0_t2.get_start_stop(key=k) sz = st_sp[1] - st_sp[0], st_sp[3] - st_sp[2] lcl_slice = m_eq_n_s0._DNDarray__array[st_sp[0]:st_sp[1], st_sp[2]:st_sp[3]] self.assertTrue(torch.all(lcl_slice - torch.ones(sz) == 0)) # reset base m_eq_n_s0._DNDarray__array[st_sp[0]:st_sp[1], st_sp[2]:st_sp[3]] = 0 # --------------------- global ---------- s1 ---------------- m_eq_n_s1 = ht.zeros((25, 25), split=1) m_eq_n_s1_t2 = ht.core.tiling.SquareDiagTiles(m_eq_n_s1, tiles_per_proc=2) k = (slice(0, 3), slice(0, 2)) m_eq_n_s1_t2[k] = 1 if m_eq_n_s1_t2[k] is not None: st_sp = m_eq_n_s1_t2.get_start_stop(key=k) sz = st_sp[1] - st_sp[0], st_sp[3] - st_sp[2] lcl_slice = m_eq_n_s1._DNDarray__array[st_sp[0]:st_sp[1], st_sp[2]:st_sp[3]] self.assertTrue(torch.all(lcl_slice - torch.ones(sz) == 0)) # reset base m_eq_n_s1._DNDarray__array[st_sp[0]:st_sp[1], st_sp[2]:st_sp[3]] = 0 # k = (slice(0, 3), slice(0, 2)) if ht.MPI_WORLD.size > 2: k = (5, 5) m_eq_n_s1_t2[k] = 1 if m_eq_n_s1_t2[k] is not None: st_sp = m_eq_n_s1_t2.get_start_stop(key=k) sz = st_sp[1] - st_sp[0], st_sp[3] - st_sp[2] lcl_slice = m_eq_n_s1._DNDarray__array[st_sp[0]:st_sp[1], st_sp[2]:st_sp[3]] self.assertTrue(torch.all(lcl_slice - torch.ones(sz) == 0)) # reset base m_eq_n_s1._DNDarray__array[st_sp[0]:st_sp[1], st_sp[2]:st_sp[3]] = 0 k = (slice(0, 3), 3) m_eq_n_s1_t2[k] = 1 if m_eq_n_s1_t2[k] is not None: st_sp = m_eq_n_s1_t2.get_start_stop(key=k) sz = st_sp[1] - st_sp[0], st_sp[3] - st_sp[2] lcl_slice = m_eq_n_s1._DNDarray__array[st_sp[0]:st_sp[1], st_sp[2]:st_sp[3]] self.assertTrue(torch.all(lcl_slice - torch.ones(sz) == 0)) # reset base m_eq_n_s1._DNDarray__array[st_sp[0]:st_sp[1], st_sp[2]:st_sp[3]] = 0 with self.assertRaises(ValueError): m_eq_n_s1_t2[1, :] with self.assertRaises(TypeError): m_eq_n_s1_t2["asdf"] with self.assertRaises(TypeError): m_eq_n_s1_t2[1, "asdf"] with self.assertRaises(ValueError): m_eq_n_s1_t2[1, :] = 2 with self.assertRaises(ValueError): m_eq_n_s1_t2.get_start_stop(key=(1, slice(None))) with self.assertRaises(ValueError): m_eq_n_s1_t2.get_start_stop(key=(1, slice(None)))
def test_save(self): if ht.io.supports_hdf5(): # local range local_range = ht.arange(100) local_range.save(self.HDF5_OUT_PATH, self.HDF5_DATASET, dtype=local_range.dtype.char()) if local_range.comm.rank == 0: with ht.io.h5py.File(self.HDF5_OUT_PATH, "r") as handle: comparison = torch.tensor( handle[self.HDF5_DATASET], dtype=torch.int32, device=self.device.torch_device, ) self.assertTrue((local_range.larray == comparison).all()) # split range split_range = ht.arange(100, split=0) split_range.save(self.HDF5_OUT_PATH, self.HDF5_DATASET, dtype=split_range.dtype.char()) if split_range.comm.rank == 0: with ht.io.h5py.File(self.HDF5_OUT_PATH, "r") as handle: comparison = torch.tensor( handle[self.HDF5_DATASET], dtype=torch.int32, device=self.device.torch_device, ) self.assertTrue((local_range.larray == comparison).all()) if ht.io.supports_netcdf(): # local range local_range = ht.arange(100) local_range.save(self.NETCDF_OUT_PATH, self.NETCDF_VARIABLE) if local_range.comm.rank == 0: with ht.io.nc.Dataset(self.NETCDF_OUT_PATH, "r") as handle: comparison = torch.tensor( handle[self.NETCDF_VARIABLE][:], dtype=torch.int32, device=self.device.torch_device, ) self.assertTrue((local_range.larray == comparison).all()) # split range split_range = ht.arange(100, split=0) split_range.save(self.NETCDF_OUT_PATH, self.NETCDF_VARIABLE) if split_range.comm.rank == 0: with ht.io.nc.Dataset(self.NETCDF_OUT_PATH, "r") as handle: comparison = torch.tensor( handle[self.NETCDF_VARIABLE][:], dtype=torch.int32, device=self.device.torch_device, ) self.assertTrue((local_range.larray == comparison).all()) # naming dimensions: string local_range = ht.arange(100, device=self.device) local_range.save(self.NETCDF_OUT_PATH, self.NETCDF_VARIABLE, dimension_names=self.NETCDF_DIMENSION) if local_range.comm.rank == 0: with ht.io.nc.Dataset(self.NETCDF_OUT_PATH, "r") as handle: comparison = handle[self.NETCDF_VARIABLE].dimensions self.assertTrue(self.NETCDF_DIMENSION in comparison) # naming dimensions: tuple local_range = ht.arange(100, device=self.device) local_range.save(self.NETCDF_OUT_PATH, self.NETCDF_VARIABLE, dimension_names=(self.NETCDF_DIMENSION, )) if local_range.comm.rank == 0: with ht.io.nc.Dataset(self.NETCDF_OUT_PATH, "r") as handle: comparison = handle[self.NETCDF_VARIABLE].dimensions self.assertTrue(self.NETCDF_DIMENSION in comparison) # appending unlimited variable split_range.save(self.NETCDF_OUT_PATH, self.NETCDF_VARIABLE, is_unlimited=True) ht.MPI_WORLD.Barrier() split_range.save( self.NETCDF_OUT_PATH, self.NETCDF_VARIABLE, mode="r+", file_slices=slice(split_range.size, None, None), # debug=True, ) if split_range.comm.rank == 0: with ht.io.nc.Dataset(self.NETCDF_OUT_PATH, "r") as handle: comparison = torch.tensor( handle[self.NETCDF_VARIABLE][:], dtype=torch.int32, device=self.device.torch_device, ) self.assertTrue((ht.concatenate( (local_range, local_range)).larray == comparison).all()) # indexing netcdf file: single index ht.MPI_WORLD.Barrier() zeros = ht.zeros((20, 1, 20, 2), device=self.device) zeros.save(self.NETCDF_OUT_PATH, self.NETCDF_VARIABLE, mode="w") ones = ht.ones(20, device=self.device) indices = (-1, 0, slice(None), 1) ones.save(self.NETCDF_OUT_PATH, self.NETCDF_VARIABLE, mode="r+", file_slices=indices) if split_range.comm.rank == 0: with ht.io.nc.Dataset(self.NETCDF_OUT_PATH, "r") as handle: comparison = torch.tensor( handle[self.NETCDF_VARIABLE][indices], dtype=torch.int32, device=self.device.torch_device, ) self.assertTrue((ones.larray == comparison).all()) # indexing netcdf file: multiple indices ht.MPI_WORLD.Barrier() small_range_split = ht.arange(10, split=0, device=self.device) small_range = ht.arange(10, device=self.device) indices = [[0, 9, 5, 2, 1, 3, 7, 4, 8, 6]] small_range_split.save(self.NETCDF_OUT_PATH, self.NETCDF_VARIABLE, mode="w", file_slices=indices) if split_range.comm.rank == 0: with ht.io.nc.Dataset(self.NETCDF_OUT_PATH, "r") as handle: comparison = torch.tensor( handle[self.NETCDF_VARIABLE][indices], dtype=torch.int32, device=self.device.torch_device, ) self.assertTrue((small_range.larray == comparison).all()) # slicing netcdf file sslice = slice(7, 2, -1) range_five_split = ht.arange(5, split=0, device=self.device) range_five = ht.arange(5, device=self.device) range_five_split.save(self.NETCDF_OUT_PATH, self.NETCDF_VARIABLE, mode="r+", file_slices=sslice) if split_range.comm.rank == 0: with ht.io.nc.Dataset(self.NETCDF_OUT_PATH, "r") as handle: comparison = torch.tensor( handle[self.NETCDF_VARIABLE][sslice], dtype=torch.int32, device=self.device.torch_device, ) self.assertTrue((range_five.larray == comparison).all()) # indexing netcdf file: broadcasting array zeros = ht.zeros((2, 1, 1, 4), device=self.device) zeros.save(self.NETCDF_OUT_PATH, self.NETCDF_VARIABLE, mode="w") ones = ht.ones((4), split=0, device=self.device) ones_nosplit = ht.ones((4), split=None, device=self.device) indices = (0, slice(None), slice(None)) ones.save(self.NETCDF_OUT_PATH, self.NETCDF_VARIABLE, mode="r+", file_slices=indices) if split_range.comm.rank == 0: with ht.io.nc.Dataset(self.NETCDF_OUT_PATH, "r") as handle: comparison = torch.tensor( handle[self.NETCDF_VARIABLE][indices], dtype=torch.int32, device=self.device.torch_device, ) self.assertTrue((ones_nosplit.larray == comparison).all()) # indexing netcdf file: broadcasting var ht.MPI_WORLD.Barrier() zeros = ht.zeros((2, 2), device=self.device) zeros.save(self.NETCDF_OUT_PATH, self.NETCDF_VARIABLE, mode="w") ones = ht.ones((1, 2, 1), split=0, device=self.device) ones_nosplit = ht.ones((1, 2, 1), device=self.device) indices = (0, ) ones.save(self.NETCDF_OUT_PATH, self.NETCDF_VARIABLE, mode="r+", file_slices=indices) if split_range.comm.rank == 0: with ht.io.nc.Dataset(self.NETCDF_OUT_PATH, "r") as handle: comparison = torch.tensor( handle[self.NETCDF_VARIABLE][indices], dtype=torch.int32, device=self.device.torch_device, ) self.assertTrue((ones_nosplit.larray == comparison).all()) # indexing netcdf file: broadcasting ones ht.MPI_WORLD.Barrier() zeros = ht.zeros((1, 1, 1, 1), device=self.device) zeros.save(self.NETCDF_OUT_PATH, self.NETCDF_VARIABLE, mode="w") ones = ht.ones((1, 1), device=self.device) ones.save(self.NETCDF_OUT_PATH, self.NETCDF_VARIABLE, mode="r+") if split_range.comm.rank == 0: with ht.io.nc.Dataset(self.NETCDF_OUT_PATH, "r") as handle: comparison = torch.tensor( handle[self.NETCDF_VARIABLE][indices], dtype=torch.int32, device=self.device.torch_device, ) self.assertTrue((ones.larray == comparison).all()) # different split and dtype ht.MPI_WORLD.Barrier() zeros = ht.zeros((2, 2), split=1, dtype=ht.int32, device=self.device) zeros_nosplit = ht.zeros((2, 2), dtype=ht.int32, device=self.device) zeros.save(self.NETCDF_OUT_PATH, self.NETCDF_VARIABLE, mode="w") if split_range.comm.rank == 0: with ht.io.nc.Dataset(self.NETCDF_OUT_PATH, "r") as handle: comparison = torch.tensor( handle[self.NETCDF_VARIABLE][:], dtype=torch.int32, device=self.device.torch_device, ) self.assertTrue((zeros_nosplit.larray == comparison).all())
def test_cdist(self): n = ht.communication.MPI_WORLD.size X = ht.ones((n * 2, 4), dtype=ht.float32, split=None) Y = ht.zeros((n * 2, 4), dtype=ht.float32, split=None) res_XX_cdist = ht.zeros((n * 2, n * 2), dtype=ht.float32, split=None) res_XX_rbf = ht.ones((n * 2, n * 2), dtype=ht.float32, split=None) res_XX_manhattan = ht.zeros((n * 2, n * 2), dtype=ht.float32, split=None) res_XY_cdist = ht.ones( (n * 2, n * 2), dtype=ht.float32, split=None) * 2 res_XY_rbf = ht.ones( (n * 2, n * 2), dtype=ht.float32, split=None) * math.exp(-1.0) res_XY_manhattan = ht.ones( (n * 2, n * 2), dtype=ht.float32, split=None) * 4 # Case 1a: X.split == None, Y == None d = ht.spatial.cdist(X, quadratic_expansion=False) self.assertTrue(ht.equal(d, res_XX_cdist)) self.assertEqual(d.split, None) d = ht.spatial.cdist(X, quadratic_expansion=True) self.assertTrue(ht.equal(d, res_XX_cdist)) self.assertEqual(d.split, None) d = ht.spatial.rbf(X, quadratic_expansion=False) self.assertTrue(ht.equal(d, res_XX_rbf)) self.assertEqual(d.split, None) d = ht.spatial.rbf(X, quadratic_expansion=True) self.assertTrue(ht.equal(d, res_XX_rbf)) self.assertEqual(d.split, None) d = ht.spatial.manhattan(X, expand=False) self.assertTrue(ht.equal(d, res_XX_manhattan)) self.assertEqual(d.split, None) d = ht.spatial.manhattan(X, expand=True) self.assertTrue(ht.equal(d, res_XX_manhattan)) self.assertEqual(d.split, None) # Case 1b: X.split == None, Y != None, Y.split == None d = ht.spatial.cdist(X, Y, quadratic_expansion=False) self.assertTrue(ht.equal(d, res_XY_cdist)) self.assertEqual(d.split, None) d = ht.spatial.cdist(X, Y, quadratic_expansion=True) self.assertTrue(ht.equal(d, res_XY_cdist)) self.assertEqual(d.split, None) d = ht.spatial.rbf(X, Y, sigma=math.sqrt(2.0), quadratic_expansion=False) self.assertTrue(ht.equal(d, res_XY_rbf)) self.assertEqual(d.split, None) d = ht.spatial.rbf(X, Y, sigma=math.sqrt(2.0), quadratic_expansion=True) self.assertTrue(ht.equal(d, res_XY_rbf)) self.assertEqual(d.split, None) d = ht.spatial.manhattan(X, Y, expand=False) self.assertTrue(ht.equal(d, res_XY_manhattan)) self.assertEqual(d.split, None) d = ht.spatial.manhattan(X, Y, expand=True) self.assertTrue(ht.equal(d, res_XY_manhattan)) self.assertEqual(d.split, None) # Case 1c: X.split == None, Y != None, Y.split == 0 Y = ht.zeros((n * 2, 4), dtype=ht.float32, split=0) res_XX_cdist = ht.zeros((n * 2, n * 2), dtype=ht.float32, split=1) res_XX_rbf = ht.ones((n * 2, n * 2), dtype=ht.float32, split=1) res_XY_cdist = ht.ones((n * 2, n * 2), dtype=ht.float32, split=1) * 2 res_XY_rbf = ht.ones( (n * 2, n * 2), dtype=ht.float32, split=1) * math.exp(-1.0) d = ht.spatial.cdist(X, Y, quadratic_expansion=False) self.assertTrue(ht.equal(d, res_XY_cdist)) self.assertEqual(d.split, 1) d = ht.spatial.cdist(X, Y, quadratic_expansion=True) self.assertTrue(ht.equal(d, res_XY_cdist)) self.assertEqual(d.split, 1) d = ht.spatial.rbf(X, Y, sigma=math.sqrt(2.0), quadratic_expansion=False) self.assertTrue(ht.equal(d, res_XY_rbf)) self.assertEqual(d.split, 1) d = ht.spatial.rbf(X, Y, sigma=math.sqrt(2.0), quadratic_expansion=True) self.assertTrue(ht.equal(d, res_XY_rbf)) self.assertEqual(d.split, 1) d = ht.spatial.manhattan(X, Y, expand=False) self.assertTrue(ht.equal(d, res_XY_manhattan)) self.assertEqual(d.split, 1) d = ht.spatial.manhattan(X, Y, expand=True) self.assertTrue(ht.equal(d, res_XY_manhattan)) self.assertEqual(d.split, 1) # Case 2a: X.split == 0, Y == None X = ht.ones((n * 2, 4), dtype=ht.float32, split=0) Y = ht.zeros((n * 2, 4), dtype=ht.float32, split=None) res_XX_cdist = ht.zeros((n * 2, n * 2), dtype=ht.float32, split=0) res_XX_rbf = ht.ones((n * 2, n * 2), dtype=ht.float32, split=0) res_XY_cdist = ht.ones((n * 2, n * 2), dtype=ht.float32, split=0) * 2 res_XY_rbf = ht.ones( (n * 2, n * 2), dtype=ht.float32, split=0) * math.exp(-1.0) d = ht.spatial.cdist(X, quadratic_expansion=False) self.assertTrue(ht.equal(d, res_XX_cdist)) self.assertEqual(d.split, 0) d = ht.spatial.cdist(X, quadratic_expansion=True) self.assertTrue(ht.equal(d, res_XX_cdist)) self.assertEqual(d.split, 0) d = ht.spatial.rbf(X, quadratic_expansion=False) self.assertTrue(ht.equal(d, res_XX_rbf)) self.assertEqual(d.split, 0) d = ht.spatial.rbf(X, quadratic_expansion=True) self.assertTrue(ht.equal(d, res_XX_rbf)) self.assertEqual(d.split, 0) d = ht.spatial.manhattan(X, expand=False) self.assertTrue(ht.equal(d, res_XX_manhattan)) self.assertEqual(d.split, 0) d = ht.spatial.manhattan(X, expand=True) self.assertTrue(ht.equal(d, res_XX_manhattan)) self.assertEqual(d.split, 0) # Case 2b: X.split == 0, Y != None, Y.split == None d = ht.spatial.cdist(X, Y, quadratic_expansion=False) self.assertTrue(ht.equal(d, res_XY_cdist)) self.assertEqual(d.split, 0) d = ht.spatial.cdist(X, Y, quadratic_expansion=True) self.assertTrue(ht.equal(d, res_XY_cdist)) self.assertEqual(d.split, 0) d = ht.spatial.rbf(X, Y, sigma=math.sqrt(2.0), quadratic_expansion=False) self.assertTrue(ht.equal(d, res_XY_rbf)) self.assertEqual(d.split, 0) d = ht.spatial.rbf(X, Y, sigma=math.sqrt(2.0), quadratic_expansion=True) self.assertTrue(ht.equal(d, res_XY_rbf)) self.assertEqual(d.split, 0) d = ht.spatial.manhattan(X, Y, expand=False) self.assertTrue(ht.equal(d, res_XY_manhattan)) self.assertEqual(d.split, 0) d = ht.spatial.manhattan(X, Y, expand=True) self.assertTrue(ht.equal(d, res_XY_manhattan)) self.assertEqual(d.split, 0) # Case 2c: X.split == 0, Y != None, Y.split == 0 Y = ht.zeros((n * 2, 4), dtype=ht.float32, split=0) d = ht.spatial.cdist(X, Y, quadratic_expansion=False) self.assertTrue(ht.equal(d, res_XY_cdist)) self.assertEqual(d.split, 0) d = ht.spatial.cdist(X, Y, quadratic_expansion=True) self.assertTrue(ht.equal(d, res_XY_cdist)) self.assertEqual(d.split, 0) d = ht.spatial.rbf(X, Y, sigma=math.sqrt(2.0), quadratic_expansion=False) self.assertTrue(ht.equal(d, res_XY_rbf)) self.assertEqual(d.split, 0) d = ht.spatial.rbf(X, Y, sigma=math.sqrt(2.0), quadratic_expansion=True) self.assertTrue(ht.equal(d, res_XY_rbf)) self.assertEqual(d.split, 0) d = ht.spatial.manhattan(X, Y, expand=False) self.assertTrue(ht.equal(d, res_XY_manhattan)) self.assertEqual(d.split, 0) d = ht.spatial.manhattan(X, Y, expand=True) self.assertTrue(ht.equal(d, res_XY_manhattan)) self.assertEqual(d.split, 0) # Case 3 X.split == 1 X = ht.ones((n * 2, 4), dtype=ht.float32, split=1) with self.assertRaises(NotImplementedError): ht.spatial.cdist(X) with self.assertRaises(NotImplementedError): ht.spatial.cdist(X, Y, quadratic_expansion=False) X = ht.ones((n * 2, 4), dtype=ht.float32, split=None) Y = ht.zeros((n * 2, 4), dtype=ht.float32, split=1) with self.assertRaises(NotImplementedError): ht.spatial.cdist(X, Y, quadratic_expansion=False) Z = ht.ones((n * 2, 6, 3), dtype=ht.float32, split=None) with self.assertRaises(NotImplementedError): ht.spatial.cdist(Z, quadratic_expansion=False) with self.assertRaises(NotImplementedError): ht.spatial.cdist(X, Z, quadratic_expansion=False) n = ht.communication.MPI_WORLD.size A = ht.ones((n * 2, 6), dtype=ht.float32, split=None) for i in range(n): A[2 * i, :] = A[2 * i, :] * (2 * i) A[2 * i + 1, :] = A[2 * i + 1, :] * (2 * i + 1) res = torch.cdist(A._DNDarray__array, A._DNDarray__array) A = ht.ones((n * 2, 6), dtype=ht.float32, split=0) for i in range(n): A[2 * i, :] = A[2 * i, :] * (2 * i) A[2 * i + 1, :] = A[2 * i + 1, :] * (2 * i + 1) B = A.astype(ht.int32) d = ht.spatial.cdist(A, B, quadratic_expansion=False) result = ht.array(res, dtype=ht.float32, split=0) self.assertTrue(ht.allclose(d, result, atol=1e-5)) n = ht.communication.MPI_WORLD.size A = ht.ones((n * 2, 6), dtype=ht.float32, split=None) for i in range(n): A[2 * i, :] = A[2 * i, :] * (2 * i) A[2 * i + 1, :] = A[2 * i + 1, :] * (2 * i + 1) res = torch.cdist(A._DNDarray__array, A._DNDarray__array) A = ht.ones((n * 2, 6), dtype=ht.float32, split=0) for i in range(n): A[2 * i, :] = A[2 * i, :] * (2 * i) A[2 * i + 1, :] = A[2 * i + 1, :] * (2 * i + 1) B = A.astype(ht.int32) d = ht.spatial.cdist(A, B, quadratic_expansion=False) result = ht.array(res, dtype=ht.float32, split=0) self.assertTrue(ht.allclose(d, result, atol=1e-8)) B = A.astype(ht.float64) d = ht.spatial.cdist(A, B, quadratic_expansion=False) result = ht.array(res, dtype=ht.float64, split=0) self.assertTrue(ht.allclose(d, result, atol=1e-8)) B = A.astype(ht.int16) d = ht.spatial.cdist(A, B, quadratic_expansion=False) result = ht.array(res, dtype=ht.float32, split=0) self.assertTrue(ht.allclose(d, result, atol=1e-8)) d = ht.spatial.cdist(B, quadratic_expansion=False) result = ht.array(res, dtype=ht.float32, split=0) self.assertTrue(ht.allclose(d, result, atol=1e-8)) B = A.astype(ht.int32) d = ht.spatial.cdist(B, quadratic_expansion=False) result = ht.array(res, dtype=ht.float32, split=0) self.assertTrue(ht.allclose(d, result, atol=1e-8)) B = A.astype(ht.float64) d = ht.spatial.cdist(B, quadratic_expansion=False) result = ht.array(res, dtype=ht.float64, split=0) self.assertTrue(ht.allclose(d, result, atol=1e-8))