def _process_observations(observations, comm): """ Helper method to distribute provided observations if necessary. Returns ------- observations : Block Distributed MPIArray Array of cluster centroids generated from provided set of observations. Format num_features : int Number of features in observation vector. labels : Block Distributed MPIArray Array of centroid indexes that classify a given observation to its closest cluster centroid. """ if not isinstance(observations, Block): observations = mpi_np.array(observations, comm=comm, dist='b') if observations.globalndim > 2: raise ValueError('only 1/2-Dimensional observation' + 'vector/matrices supported.') num_observations = observations.globalshape[0] num_features = \ observations.globalshape[1] if observations.globalndim == 2 else 1 labels = mpi_np.zeros(num_observations, dtype=np.int64, comm=comm, dist=observations.dist) return observations, num_features, labels
def empty(size, iters=10000): time = measure_time() for _ in range(iters): mpi_np.empty(size, dtype=np.float64) time = measure_time() - time gc.collect() return time / iters
def creation(size, iters=10000): data = np.arange(size, dtype=np.float64).tolist() time = measure_time() for _ in range(iters): mpi_np.array(data, dtype=np.float64) time = measure_time() - time gc.collect() return time/iters
def empty(size, iters=10000, comm=MPI.COMM_WORLD): time = measure_time() for _ in range(iters): mpi_np.empty(size, dtype=np.float64) time = measure_time() - time gc.collect() comm.reduce(time, op=MPI.MAX, root=0) return time / iters
def arange(size, iters=10000, comm=MPI.COMM_WORLD): comm.Barrier() time = measure_time() for _ in range(iters): mpi_np.arange(size, dtype=np.float64, comm=comm, dist='b') time = measure_time() - time comm.reduce(time, op=MPI.MAX, root=0) return time / iters
def creation(size, iters=10000, comm=MPI.COMM_WORLD): data = np.arange(size, dtype=np.float64).tolist() time = measure_time() for _ in range(iters): mpi_np.array(data, dtype=np.float64) time = measure_time() - time gc.collect() comm.reduce(time, op=MPI.MAX, root=0) return time / iters
def test_block_distribution_matmul(self): rank = self.comm.Get_rank() mpi_array_a = mpi_np.array(self.np_array_a, dist='b') mpi_array_b = mpi_np.array(self.np_array_b, dist='b') #Check result consistent with numpy self.assertTrue(np.alltrue( np.matmul(self.np_array_a, self.np_array_b)[rank] == \ mpi_np.matmul(mpi_array_a, mpi_array_b)))
def test_Replicated_matmul(self): mpi_array_a = mpi_np.array(self.np_array_a, dist='r') mpi_array_b = mpi_np.array(self.np_array_b, dist='r') #Check return type self.assertTrue( isinstance(mpi_np.matmul(self.np_array_a, self.np_array_b), mpi_np.MPIArray)) self.assertTrue( isinstance(mpi_np.matmul(mpi_array_a, mpi_array_b), mpi_np.MPIArray)) self.assertTrue( isinstance(mpi_np.matmul(mpi_array_a, self.np_array_b), mpi_np.MPIArray)) self.assertTrue( isinstance(mpi_np.matmul(self.np_array_a, mpi_array_b), mpi_np.MPIArray)) #Check result consistent with numpy self.assertTrue(np.alltrue( np.matmul(self.np_array_a, self.np_array_b) == \ mpi_np.matmul(mpi_array_a, mpi_array_b))) self.assertTrue(np.alltrue( np.matmul(self.np_array_a, self.np_array_b) == \ mpi_np.matmul(self.np_array_a, mpi_array_b))) self.assertTrue(np.alltrue( np.matmul(self.np_array_a, self.np_array_b) == \ mpi_np.matmul(mpi_array_a, self.np_array_b)))
def test_validate_shape_called(self): shape_int = 1 with mock.patch('mpids.MPInumpy.array_creation._validate_shape' ) as mock_obj_int: mpi_np.zeros(shape_int) mock_obj_int.assert_called_with(shape_int) shape_tuple = (1, 2) with mock.patch('mpids.MPInumpy.array_creation._validate_shape' ) as mock_obj_tuple: mpi_np.zeros(shape_tuple) mock_obj_tuple.assert_called_with(shape_tuple)
def setUp(self): self.comm = MPI.COMM_WORLD #Number of clusters self.k = 2 self.seeded_centroids = np.arange(4).reshape(2, 2) self.seeded_num_centroids = self.seeded_centroids.shape[0] self.seeded_num_features = self.seeded_centroids.shape[-1] self.obs_1_feature = self.__create_1_feature_obs() self.obs_2_features = self.__create_2_feature_obs() self.obs_3_features = self.__create_3_feature_obs() self.dist_obs_1_feature = mpi_np.array(self.obs_1_feature, dist='b') self.dist_obs_2_features = mpi_np.array(self.obs_2_features, dist='b') self.dist_obs_3_features = mpi_np.array(self.obs_3_features, dist='b')
def test_under_partitioned_block_distribution_matmul(self): #Current version of code will under partition a 2x8 matrix. #Want to make sure logic is sound with petsc4py. np_8x2_array = self.np_array_a.reshape(8, 2) np_2x8_array = self.np_array_b.reshape(2, 8) mpi_array_a = mpi_np.array(np_8x2_array, dist='b') mpi_array_b = mpi_np.array(np_2x8_array, dist='b') rank = self.comm.Get_rank() local_row_start = rank * 2 local_row_stop = local_row_start + 2 #Check result consistent with numpy self.assertTrue(np.alltrue( np.matmul(np_8x2_array, np_2x8_array)[local_row_start: local_row_stop] == \ mpi_np.matmul(mpi_array_a, mpi_array_b)))
def test_process_observations_providing_mpi_np_array(self): #Default block distribution mpi_np_observations = mpi_np.arange(8, dist='b') processed_obs, num_features, labels = \ _process_observations(mpi_np_observations, self.comm) self.assertTrue(isinstance(processed_obs, Block)) self.assertEqual(num_features, 1) self.assertTrue(isinstance(labels, Block)) #Replicated distribution mpi_np_observations = mpi_np.arange(8, dist='r') processed_obs, num_features, labels = \ _process_observations(mpi_np_observations, self.comm) self.assertTrue(isinstance(processed_obs, Block)) self.assertEqual(num_features, 1) self.assertTrue(isinstance(labels, Block))
def setUp(self): parms = self.create_setUp_parms() self.comm = parms.get('comm') self.dist = parms.get('dist') self.data = parms.get('data') self.mpi_array = mpi_np.array(self.data, comm=self.comm, dist=self.dist)
def test_process_centroids_providing_Distributed_MPIArray(self): k = mpi_np.array(self.seeded_centroids, dist='b') num_features = self.seeded_num_features obs = self.dist_obs_2_features centroids, num_centroids, temp_centroids = \ _process_centroids(k, num_features, obs, self.comm) self.assertTrue(isinstance(centroids, Replicated)) self.assertTrue(isinstance(temp_centroids, Replicated)) self.assertEqual(num_centroids, self.seeded_num_centroids) #Check seeded centroids returned self.assertTrue(np.alltrue(self.seeded_centroids == centroids))
def __centroids_from_int(k, num_features, observations, comm): centroids = mpi_np.zeros((k, num_features), dtype=observations.dtype, comm=comm, dist='r') #Pick initial centroids num_observations = observations.globalshape[0] for j in range(k): i = j * (num_observations // k) centroids[j] = observations[i] return centroids
def test_return_behavior_with_np_data_from_all_ranks(self): for root in range(self.size): np_data = None self.assertTrue(np_data is None) if self.rank == root: np_data = self.np_data mpi_np_array = mpi_np.array(np_data, comm=self.comm, root=root, dist=self.dist) self.assertTrue(isinstance(mpi_np_array, mpi_np.MPIArray)) self.assertTrue(isinstance(mpi_np_array, self.dist_class)) self.assertEqual(mpi_np_array.comm, self.comm) self.assertEqual(mpi_np_array.dist, self.dist)
def test_return_behavior_from_all_ranks_with_tuple_shape(self): for root in range(self.size): shape = None self.assertTrue(shape is None) if self.rank == root: shape = self.tuple_shape mpi_np_empty = mpi_np.empty(shape, comm=self.comm, root=root, dist=self.dist) self.assertTrue(isinstance(mpi_np_empty, mpi_np.MPIArray)) self.assertTrue(isinstance(mpi_np_empty, self.dist_class)) self.assertEqual(mpi_np_empty.comm, self.comm) self.assertEqual(mpi_np_empty.dist, self.dist)
def test_return_behavior_from_all_ranks_with_int_shape(self): for root in range(self.size): shape = None self.assertTrue(shape is None) if self.rank == root: shape = self.int_shape mpi_np_zeros = mpi_np.zeros(shape, comm=self.comm, root=root, dist=self.dist) self.assertTrue(isinstance(mpi_np_zeros, mpi_np.MPIArray)) self.assertTrue(isinstance(mpi_np_zeros, self.dist_class)) self.assertEqual(mpi_np_zeros.comm, self.comm) self.assertEqual(mpi_np_zeros.dist, self.dist) self.assertTrue(np.alltrue((mpi_np_zeros) == (0)))
def test_unsupported_distribution(self): data = np.arange(10) comm = MPI.COMM_WORLD with self.assertRaises(InvalidDistributionError): mpi_np.array(data, comm=comm, dist='bananas') # Test cases where dim input data != dim distribution with self.assertRaises(InvalidDistributionError): mpi_np.array(data, comm=comm, dist=('*', 'b')) with self.assertRaises(InvalidDistributionError): mpi_np.array(data, comm=comm, dist=('b', 'b'))
def test_return_behavior_from_all_ranks_float_stop(self): np_arange = np.arange(20.0) for root in range(self.size): stop = None self.assertTrue(stop is None) if self.rank == root: stop = 20.0 mpi_np_arange = mpi_np.arange(stop, comm=self.comm, root=root, dist=self.dist) self.assertTrue(isinstance(mpi_np_arange, mpi_np.MPIArray)) self.assertTrue(isinstance(mpi_np_arange, self.dist_class)) self.assertEqual(mpi_np_arange.comm, self.comm) self.assertEqual(mpi_np_arange.dist, self.dist) self.assertTrue(np.alltrue(mpi_np_arange[:] == np_arange))
def setUp(self): parms = self.create_setUp_parms() self.comm = parms.get('comm') self.rank = parms.get('rank') self.comm_size = parms.get('comm_size') self.dist = parms.get('dist') self.data = parms.get('data') self.local_data = parms.get('local_data') self.comm_dims = parms.get('comm_dims') self.comm_coord = parms.get('comm_coord') self.local_to_global = parms.get('local_to_global') self.np_array = np.array(self.data) self.np_local_array = np.array(self.local_data) self.mpi_array = mpi_np.array(self.data, comm=self.comm, dist=self.dist)
def test_kmeans_produces_same_results_as_scipy_kmeans2_for_3_features_with_Block_distributed_seed( self): k = np.array([[-1, -1, -1], [1, 1, 1]]) k_mpi_np = mpi_np.array(k, dist='b') scipy_centriods, scipy_labels = \ scipy_cluster.kmeans2(self.obs_3_features, k, iter=1000) mpids_centriods, mpids_labels = \ mpi_scipy_cluster.kmeans(self.dist_obs_3_features, k_mpi_np) #Check results self.assertTrue(self.__compare_labels(scipy_labels, mpids_labels)) self.assertTrue( self.__compare_centroids(scipy_centriods, mpids_centriods)) #Check returned data types self.assertTrue(isinstance(mpids_centriods, Replicated)) self.assertTrue(isinstance(mpids_labels, Replicated)) #Check number of returned elements self.assertTrue(mpids_centriods.globalshape[0] == len(k)) self.assertTrue( mpids_labels.globalshape[0] == self.obs_3_features.shape[0])
def _process_centroids(k, num_features, observations, comm): """ Helper method to distribute provided k if necessary and resolve whether or not the input is seeded. Returns ------- centroids : Replicated MPIArray Array of cluster centroids generated from provided set of observations. num_centroids : int Number of centroids. temp_centroids : Replicated MPIArray Intermediate centroid locations prior to computing distributed result. """ def __unsupported_type(*args): raise TypeError('only number of clusters(int) or ' + \ 'centroid seeds(ndarray or MPIArray) should be k.') __process_centroid_map = { int: __centroids_from_int, np.ndarray: __centroids_from_ndarray, Block: __centroids_from_mpinp_block, Replicated: __centroids_from_mpinp_undist } centroids = \ __process_centroid_map.get(type(k), __unsupported_type)(k, num_features, observations, comm) num_centroids = centroids.shape[0] if num_features != centroids.shape[-1] and centroids.ndim != 1: raise ValueError('expected {} '.format(num_features) + \ 'number of features in seeded cluster centroids.') temp_centroids = mpi_np.zeros((num_centroids, num_features), dtype=observations.dtype, comm=comm, dist='r') return centroids, num_centroids, temp_centroids
def test_validate_shape_errors_propegated(self): with mock.patch('mpids.MPInumpy.array_creation._validate_shape', side_effect=Exception('Mock Execption')) as mock_obj: with self.assertRaises(Exception): mpi_np.zeros(1)
from mpi4py import MPI import numpy as np import mpids.MPInumpy as mpi_np if __name__ == "__main__": #Capture default communicator, MPI process rank, and number of MPI processes comm = MPI.COMM_WORLD rank = comm.Get_rank() size = comm.Get_size() note = "Note: creation routines are using their default MPI related kwargs." note += "\nDefault kwargs:" note += " routine(..., comm=MPI.COMM_WORLD, root=0, dist='b')\n" print(note) if rank == 0 else None #Arange, evenly spaced values within specified interval print('From arange(start, stop, step) Routine') if rank == 0 else None mpi_arange = mpi_np.arange(size * 5) print('Local Arange Result Rank {}: {}'.format(rank, mpi_arange)) print() if rank == 0 else None
def test_unsupported_functionality(self): #Use of 'out' field mpi_out = np.zeros(()) with self.assertRaises(NotSupportedError): mpi_np.matmul(self.np_array_a, self.np_array_b, out=mpi_out)
import mpids.MPInumpy as mpi_np import numpy as np from mpi4py import MPI from operations import add, sub, mul, div if __name__ == '__main__': comm = MPI.COMM_WORLD rank = comm.Get_rank() n_procs = comm.Get_size() local_size = 2**16 size = n_procs * local_size iters = 1000 mpi_np_arr = mpi_np.arange(size, dtype=np.float64) add_time = add(mpi_np_arr, iters=iters) sub_time = sub(mpi_np_arr, iters=iters) mul_time = mul(mpi_np_arr, iters=iters) div_time = div(mpi_np_arr, iters=iters) if rank == 0: print("mpi_np,add,%d,%d,%.9f" % (n_procs, local_size, add_time)) print("mpi_np,sub,%d,%d,%.9f" % (n_procs, local_size, sub_time)) print("mpi_np,mul,%d,%d,%.9f" % (n_procs, local_size, mul_time)) print("mpi_np,div,%d,%d,%.9f" % (n_procs, local_size, div_time))
from mpi4py import MPI import mpids.MPInumpy as mpi_np import mpids.MPIscipy.cluster as mpi_cluster from operations import gen_blobs, measure_time if __name__ == '__main__': comm = MPI.COMM_WORLD rank = comm.Get_rank() n_procs = comm.Get_size() runs = int(sys.argv[1]) obs_power = int(sys.argv[2]) local_size = 2**obs_power num_obs = n_procs * local_size k = 2 features = 2 observations, labels = gen_blobs(num_obs, features, k) mpi_obs = mpi_np.array(observations, dist='b', dtype=np.float64) for _ in range(runs): comm.Barrier() time = measure_time() centroids, labels = mpi_cluster.kmeans(mpi_obs, k) time = measure_time() - time comm.reduce(time, op=MPI.MAX, root=0) if rank == 0: print("mpi_scipy,%d,%d,%d,%d,%.9f" % (n_procs, local_size, features, k, time)) del centroids, labels
from mpi4py import MPI import numpy as np import mpids.MPInumpy as mpi_np if __name__ == "__main__": #Capture default communicator and MPI process rank comm = MPI.COMM_WORLD rank = comm.Get_rank() #Arrays elements (values 0-15) data_1D = np.arange(16) #Block data distribution block_mpi_array_1D = mpi_np.array(data_1D, comm=comm, dist='b') print('1D Global data:\n{}\n\n'.format(data_1D)) if rank == 0 else None comm.Barrier() print('1D Blocked Data Rank {}:\n{}'.format(rank, block_mpi_array_1D)) #Replicated data distribution replicated_mpi_array_1D = mpi_np.array(data_1D, comm=comm, dist='r') comm.Barrier() print('1D Replicated Data Rank {}:\n{}'\ .format(rank, replicated_mpi_array_1D))
from mpi4py import MPI import numpy as np import mpids.MPInumpy as mpi_np if __name__ == "__main__": #Capture default communicator, MPI process rank, and number of MPI processes comm = MPI.COMM_WORLD rank = comm.Get_rank() size = comm.Get_size() note = "Note: creation routines are using their default MPI related kwargs." note += "\nDefault kwargs:" note += " routine(..., comm=MPI.COMM_WORLD, root=0, dist='b')\n" print(note) if rank == 0 else None #Array, distributed array-like data print('From array(array_like_data) Routine') if rank == 0 else None array_like_data = list(range(size * 5)) mpi_array = mpi_np.array(array_like_data) print('Local Array Result Rank {}: {}'.format(rank, mpi_array)) print() if rank == 0 else None