def test_Replicated_matmul(self): mpi_array_a = mpi_np.array(self.np_array_a, dist='r') mpi_array_b = mpi_np.array(self.np_array_b, dist='r') #Check return type self.assertTrue( isinstance(mpi_np.matmul(self.np_array_a, self.np_array_b), mpi_np.MPIArray)) self.assertTrue( isinstance(mpi_np.matmul(mpi_array_a, mpi_array_b), mpi_np.MPIArray)) self.assertTrue( isinstance(mpi_np.matmul(mpi_array_a, self.np_array_b), mpi_np.MPIArray)) self.assertTrue( isinstance(mpi_np.matmul(self.np_array_a, mpi_array_b), mpi_np.MPIArray)) #Check result consistent with numpy self.assertTrue(np.alltrue( np.matmul(self.np_array_a, self.np_array_b) == \ mpi_np.matmul(mpi_array_a, mpi_array_b))) self.assertTrue(np.alltrue( np.matmul(self.np_array_a, self.np_array_b) == \ mpi_np.matmul(self.np_array_a, mpi_array_b))) self.assertTrue(np.alltrue( np.matmul(self.np_array_a, self.np_array_b) == \ mpi_np.matmul(mpi_array_a, self.np_array_b)))
def creation(size, iters=10000): data = np.arange(size, dtype=np.float64).tolist() time = measure_time() for _ in range(iters): mpi_np.array(data, dtype=np.float64) time = measure_time() - time gc.collect() return time/iters
def creation(size, iters=10000, comm=MPI.COMM_WORLD): data = np.arange(size, dtype=np.float64).tolist() time = measure_time() for _ in range(iters): mpi_np.array(data, dtype=np.float64) time = measure_time() - time gc.collect() comm.reduce(time, op=MPI.MAX, root=0) return time / iters
def array(size, iters=10000, comm=MPI.COMM_WORLD): data = np.arange(size, dtype=np.float64).tolist() comm.Barrier() time = measure_time() for _ in range(iters): mpi_np.array(data, dtype=np.float64, comm=comm, dist='b') time = measure_time() - time comm.reduce(time, op=MPI.MAX, root=0) return time / iters
def test_block_distribution_matmul(self): rank = self.comm.Get_rank() mpi_array_a = mpi_np.array(self.np_array_a, dist='b') mpi_array_b = mpi_np.array(self.np_array_b, dist='b') #Check result consistent with numpy self.assertTrue(np.alltrue( np.matmul(self.np_array_a, self.np_array_b)[rank] == \ mpi_np.matmul(mpi_array_a, mpi_array_b)))
def setUp(self): self.comm = MPI.COMM_WORLD #Number of clusters self.k = 2 self.seeded_centroids = np.arange(4).reshape(2, 2) self.seeded_num_centroids = self.seeded_centroids.shape[0] self.seeded_num_features = self.seeded_centroids.shape[-1] self.obs_1_feature = self.__create_1_feature_obs() self.obs_2_features = self.__create_2_feature_obs() self.obs_3_features = self.__create_3_feature_obs() self.dist_obs_1_feature = mpi_np.array(self.obs_1_feature, dist='b') self.dist_obs_2_features = mpi_np.array(self.obs_2_features, dist='b') self.dist_obs_3_features = mpi_np.array(self.obs_3_features, dist='b')
def test_under_partitioned_block_distribution_matmul(self): #Current version of code will under partition a 2x8 matrix. #Want to make sure logic is sound with petsc4py. np_8x2_array = self.np_array_a.reshape(8, 2) np_2x8_array = self.np_array_b.reshape(2, 8) mpi_array_a = mpi_np.array(np_8x2_array, dist='b') mpi_array_b = mpi_np.array(np_2x8_array, dist='b') rank = self.comm.Get_rank() local_row_start = rank * 2 local_row_stop = local_row_start + 2 #Check result consistent with numpy self.assertTrue(np.alltrue( np.matmul(np_8x2_array, np_2x8_array)[local_row_start: local_row_stop] == \ mpi_np.matmul(mpi_array_a, mpi_array_b)))
def _process_observations(observations, comm): """ Helper method to distribute provided observations if necessary. Returns ------- observations : Block Distributed MPIArray Array of cluster centroids generated from provided set of observations. Format num_features : int Number of features in observation vector. labels : Block Distributed MPIArray Array of centroid indexes that classify a given observation to its closest cluster centroid. """ if not isinstance(observations, Block): observations = mpi_np.array(observations, comm=comm, dist='b') if observations.globalndim > 2: raise ValueError('only 1/2-Dimensional observation' + 'vector/matrices supported.') num_observations = observations.globalshape[0] num_features = \ observations.globalshape[1] if observations.globalndim == 2 else 1 labels = mpi_np.zeros(num_observations, dtype=np.int64, comm=comm, dist=observations.dist) return observations, num_features, labels
def setUp(self): parms = self.create_setUp_parms() self.comm = parms.get('comm') self.dist = parms.get('dist') self.data = parms.get('data') self.mpi_array = mpi_np.array(self.data, comm=self.comm, dist=self.dist)
def test_process_centroids_providing_Distributed_MPIArray(self): k = mpi_np.array(self.seeded_centroids, dist='b') num_features = self.seeded_num_features obs = self.dist_obs_2_features centroids, num_centroids, temp_centroids = \ _process_centroids(k, num_features, obs, self.comm) self.assertTrue(isinstance(centroids, Replicated)) self.assertTrue(isinstance(temp_centroids, Replicated)) self.assertEqual(num_centroids, self.seeded_num_centroids) #Check seeded centroids returned self.assertTrue(np.alltrue(self.seeded_centroids == centroids))
def test_return_behavior_with_np_data_from_all_ranks(self): for root in range(self.size): np_data = None self.assertTrue(np_data is None) if self.rank == root: np_data = self.np_data mpi_np_array = mpi_np.array(np_data, comm=self.comm, root=root, dist=self.dist) self.assertTrue(isinstance(mpi_np_array, mpi_np.MPIArray)) self.assertTrue(isinstance(mpi_np_array, self.dist_class)) self.assertEqual(mpi_np_array.comm, self.comm) self.assertEqual(mpi_np_array.dist, self.dist)
def test_unsupported_distribution(self): data = np.arange(10) comm = MPI.COMM_WORLD with self.assertRaises(InvalidDistributionError): mpi_np.array(data, comm=comm, dist='bananas') # Test cases where dim input data != dim distribution with self.assertRaises(InvalidDistributionError): mpi_np.array(data, comm=comm, dist=('*', 'b')) with self.assertRaises(InvalidDistributionError): mpi_np.array(data, comm=comm, dist=('b', 'b'))
def setUp(self): parms = self.create_setUp_parms() self.comm = parms.get('comm') self.rank = parms.get('rank') self.comm_size = parms.get('comm_size') self.dist = parms.get('dist') self.data = parms.get('data') self.local_data = parms.get('local_data') self.comm_dims = parms.get('comm_dims') self.comm_coord = parms.get('comm_coord') self.local_to_global = parms.get('local_to_global') self.np_array = np.array(self.data) self.np_local_array = np.array(self.local_data) self.mpi_array = mpi_np.array(self.data, comm=self.comm, dist=self.dist)
def test_kmeans_produces_same_results_as_scipy_kmeans2_for_3_features_with_Block_distributed_seed( self): k = np.array([[-1, -1, -1], [1, 1, 1]]) k_mpi_np = mpi_np.array(k, dist='b') scipy_centriods, scipy_labels = \ scipy_cluster.kmeans2(self.obs_3_features, k, iter=1000) mpids_centriods, mpids_labels = \ mpi_scipy_cluster.kmeans(self.dist_obs_3_features, k_mpi_np) #Check results self.assertTrue(self.__compare_labels(scipy_labels, mpids_labels)) self.assertTrue( self.__compare_centroids(scipy_centriods, mpids_centriods)) #Check returned data types self.assertTrue(isinstance(mpids_centriods, Replicated)) self.assertTrue(isinstance(mpids_labels, Replicated)) #Check number of returned elements self.assertTrue(mpids_centriods.globalshape[0] == len(k)) self.assertTrue( mpids_labels.globalshape[0] == self.obs_3_features.shape[0])
from mpi4py import MPI import numpy as np import mpids.MPInumpy as mpi_np if __name__ == "__main__": #Capture default communicator, MPI process rank, and number of MPI processes comm = MPI.COMM_WORLD rank = comm.Get_rank() size = comm.Get_size() note = "Note: creation routines are using their default MPI related kwargs." note += "\nDefault kwargs:" note += " routine(..., comm=MPI.COMM_WORLD, root=0, dist='b')\n" print(note) if rank == 0 else None #Array, distributed array-like data print('From array(array_like_data) Routine') if rank == 0 else None array_like_data = list(range(size * 5)) mpi_array = mpi_np.array(array_like_data) print('Local Array Result Rank {}: {}'.format(rank, mpi_array)) print() if rank == 0 else None
def __centroids_from_ndarray(k, num_features, observations, comm): #Duplicate ndarray on all processes return mpi_np.array(k, dtype=observations.dtype, comm=comm, dist='r')
import mpids.MPIscipy.cluster as mpi_scipy_cluster if __name__ == "__main__": #Capture default communicator and MPI process rank comm = MPI.COMM_WORLD rank = comm.Get_rank() #Create simulated 1D observation vector k, num_points, centers = 2, 10, [[-1, -0.75], [1, 1.25]] x0 = np.random.uniform(centers[0][0], centers[0][1], size=(num_points)) x1 = np.random.uniform(centers[1][0], centers[1][1], size=(num_points)) np_1D_obs_features = np.array(x0.tolist() + x1.tolist(), dtype=np.float64) #Distribute observations among MPI processes mpi_np_1D_obs_features = mpi_np.array(np_1D_obs_features, dist='b') #Compute K-Means Clustering Result centroids, labels = mpi_scipy_cluster.kmeans( mpi_np_1D_obs_features, k, #Below are the default kwargs thresh=1e-5, comm=MPI.COMM_WORLD) #Compute K-Means Clustering Result using Non-Distributed Input centroids_2, labels_2 = mpi_scipy_cluster.kmeans(np_1D_obs_features, k) #Check Distributed & Non-Distributed inputs generate the same result assert np.allclose(centroids, centroids_2) assert np.allclose(labels, labels_2)
from mpi4py import MPI import mpids.MPInumpy as mpi_np import mpids.MPIscipy.cluster as mpi_cluster from operations import gen_blobs, measure_time if __name__ == '__main__': comm = MPI.COMM_WORLD rank = comm.Get_rank() n_procs = comm.Get_size() runs = int(sys.argv[1]) obs_power = int(sys.argv[2]) local_size = 2**obs_power num_obs = n_procs * local_size k = 2 features = 2 observations, labels = gen_blobs(num_obs, features, k) mpi_obs = mpi_np.array(observations, dist='b', dtype=np.float64) for _ in range(runs): comm.Barrier() time = measure_time() centroids, labels = mpi_cluster.kmeans(mpi_obs, k) time = measure_time() - time comm.reduce(time, op=MPI.MAX, root=0) if rank == 0: print("mpi_scipy,%d,%d,%d,%d,%.9f" % (n_procs, local_size, features, k, time)) del centroids, labels
from mpi4py import MPI import numpy as np import mpids.MPInumpy as mpi_np if __name__ == "__main__": #Capture default communicator and MPI process rank comm = MPI.COMM_WORLD rank = comm.Get_rank() #Arrays elements (values 0-15) data_1D = np.arange(16) #Block data distribution block_mpi_array_1D = mpi_np.array(data_1D, comm=comm, dist='b') print('1D Global data:\n{}\n\n'.format(data_1D)) if rank == 0 else None comm.Barrier() print('1D Blocked Data Rank {}:\n{}'.format(rank, block_mpi_array_1D)) #Replicated data distribution replicated_mpi_array_1D = mpi_np.array(data_1D, comm=comm, dist='r') comm.Barrier() print('1D Replicated Data Rank {}:\n{}'\ .format(rank, replicated_mpi_array_1D))
from mpi4py import MPI import platform import mpids.MPInumpy as mpi_np if __name__ == "__main__": #Capture default communicator and MPI process rank comm = MPI.COMM_WORLD rank = comm.Get_rank() #Create array-like data with 8 elements, values 0-7 data = list(range(8)) #Create MPInumpy array, passing data and default communicator mpi_array = mpi_np.array(data, comm=comm) #Print mpi_array attributes for each MPI process output = '{} Rank {} MPIArray Attributes: \n'.format(platform.node(), rank) output += '\t mpi_array.base = {} \n'.format(mpi_array.base) output += '\t mpi_array.dtype = {} \n'.format(mpi_array.dtype) #Common distributed local and global properties output += '\t mpi_array.shape = {} \n'.format(mpi_array.shape) output += '\t mpi_array.globalshape = {} \n'.format(mpi_array.globalshape) output += '\t mpi_array.size = {} \n'.format(mpi_array.size) output += '\t mpi_array.globalsize = {} \n'.format(mpi_array.globalsize) output += '\t mpi_array.nbytes = {} \n'.format(mpi_array.nbytes) output += '\t mpi_array.globalnbytes = {} \n'.format(mpi_array.globalnbytes) output += '\t mpi_array.ndim = {} \n'.format(mpi_array.ndim) output += '\t mpi_array.globalndim = {} \n'.format(mpi_array.globalndim) #Unique properties to MPIArray output += '\t mpi_array.dist = {} \n'.format(mpi_array.dist)