def verify(g_a, g_b): a = ga.get(g_a) b = ga.get(g_b) if not np.all(a[::-1] == b): print "Mismatch: a[::-1] is not equal to b" ga.error("verify failed") print "Transpose OK"
def verify(g_a, g_b): ### copy the entire block of data from the global array "g_a" into the ### local array "a" and similarly for "g_b" and "b". if not np.all(a[::-1] == b): print "Mismatch: a[::-1] is not equal to b" ga.error("verify failed") print "Transpose OK"
def matrix_multiply(): # Configure array dimensions. Force an unequal data distribution. dims = [TOTALELEMS] * NDIM chunk = [TOTALELEMS / nprocs - 1] * NDIM # Create a global array g_a and duplicate it to get g_b and g_c. g_a = ga.create(ga.C_DBL, dims, "array A", chunk) if not g_a: ga.error("create failed: A") if not me: print "Created Array A" g_b = ga.duplicate(g_a, "array B") g_c = ga.duplicate(g_a, "array C") if not g_b or not g_c: ga.eror("duplicate failed") if not me: print "Created Arrays B and C" # Initialize data in matrices a and b. if not me: print "Initializing matrix A and B" a = np.random.rand(*dims) * 29 b = np.random.rand(*dims) * 37 # Copy data to global arrays g_a and g_b. if not me: ga.put(g_a, a) ga.put(g_b, b) # Synchronize all processors to make sure everyone has data. ga.sync() # Determine which block of data is locally owned. Note that # the same block is locally owned for all GAs. lo, hi = ga.distribution(g_c) # Get the blocks from g_a and g_b needed to compute this block in # g_c and copy them into the local buffers a and b. a = ga.get(g_a, (lo[0], 0), (hi[0], dims[0])) b = ga.get(g_b, (0, lo[1]), (dims[1], hi[1])) # Do local matrix multiplication and store the result in local # buffer c. Start by evaluating the transpose of b. btrns = b.transpose() # Multiply a and b to get c. c = np.dot(a, b) # Copy c back to g_c. ga.put(g_c, c, lo, hi) verify(g_a, g_b, g_c) # Deallocate arrays. ga.destroy(g_a) ga.destroy(g_b) ga.destroy(g_c)
def matrix_multiply(): # Configure array dimensions. Force an unequal data distribution. dims = [TOTALELEMS]*NDIM chunk = [TOTALELEMS/nprocs-1]*NDIM # Create a global array g_a and duplicate it to get g_b and g_c. g_a = ga.create(ga.C_DBL, dims, "array A", chunk) if not g_a: ga.error("create failed: A") if not me: print "Created Array A" g_b = ga.duplicate(g_a, "array B") g_c = ga.duplicate(g_a, "array C") if not g_b or not g_c: ga.eror("duplicate failed") if not me: print "Created Arrays B and C" # Initialize data in matrices a and b. if not me: print "Initializing matrix A and B" a = np.random.rand(*dims)*29 b = np.random.rand(*dims)*37 # Copy data to global arrays g_a and g_b. if not me: ga.put(g_a, a) ga.put(g_b, b) # Synchronize all processors to make sure everyone has data. ga.sync() # Determine which block of data is locally owned. Note that # the same block is locally owned for all GAs. lo,hi = ga.distribution(g_c) # Get the blocks from g_a and g_b needed to compute this block in # g_c and copy them into the local buffers a and b. a = ga.get(g_a, (lo[0],0), (hi[0],dims[0])) b = ga.get(g_b, (0,lo[1]), (dims[1],hi[1])) # Do local matrix multiplication and store the result in local # buffer c. Start by evaluating the transpose of b. btrns = b.transpose() # Multiply a and b to get c. c = np.dot(a,b) # Copy c back to g_c. ga.put(g_c, c, lo, hi) verify(g_a, g_b, g_c) # Deallocate arrays. ga.destroy(g_a) ga.destroy(g_b) ga.destroy(g_c)
def TRANSPOSE1D(): # Configure array dimensions. Force an unequal data distribution. dims = [nprocs * TOTALELEMS + nprocs / 2] chunk = [TOTALELEMS] # minimum data on each process # create a global array g_a and duplicate it to get g_b ### create GA of integers with dimension "dims" with minimum block size ### "chunk" and name of "Array A" and assign the handle to the variable ### "g_a" if not g_a: ga.error("create failed: A") if not me: print "Created Array A" ### create a second global array assigned to the handled "g_b" by ### duplicating "g_a" and assigning the name "Array B" if not g_b: ga.error("duplicate failed") if not me: print "Created Array B" # initialize data in g_a if not me: print "Initializing matrix A" ### copy contents of a numpy range array into the remote ### global array "g_a" ### HINT: use numpy's arange() e.g. np.arange(###, dtype=np.int32) # Synchronize all processors to guarantee that everyone has data # before proceeding to the next step. ### synchronize all processors # Start initial phase of inversion by inverting the data held locally on # each processor. Start by finding out which data each processor owns. ### find out which block of data my node owns for the global array "g_a" ### and store the contents of the arrays into "lo" and "hi" # Get locally held data and copy it into local buffer a ### use the arrays "lo" and "hi" to copy the locally held block of data ### from the global array "g_a" into the local array "a". # Invert data locally b = a[::-1] # Invert data globally by copying locally inverted blocks into # their inverted positions in the GA lo2 = [dims[0] - hi[0]] hi2 = [dims[0] - lo[0]] ### copy data from the local array "b" into the block of the global ### array "g_a" described by the integer arrays "lo" and "hi" # Synchronize all processors to make sure inversion is complete ### synchronize all processors # Check to see if inversion is correct if not me: verify(g_a, g_b)
def TRANSPOSE1D(): # Configure array dimensions. Force an unequal data distribution. dims = [nprocs*TOTALELEMS + nprocs/2] chunk = [TOTALELEMS] # minimum data on each process # create a global array g_a and duplicate it to get g_b ### create GA of integers with dimension "dims" with minimum block size ### "chunk" and name of "Array A" and assign the handle to the variable ### "g_a" if not g_a: ga.error("create failed: A") if not me: print "Created Array A" ### create a second global array assigned to the handled "g_b" by ### duplicating "g_a" and assigning the name "Array B" if not g_b: ga.error("duplicate failed") if not me: print "Created Array B" # initialize data in g_a if not me: print "Initializing matrix A" ### copy contents of a numpy range array into the remote ### global array "g_a" ### HINT: use numpy's arange() e.g. np.arange(###, dtype=np.int32) # Synchronize all processors to guarantee that everyone has data # before proceeding to the next step. ### synchronize all processors # Start initial phase of inversion by inverting the data held locally on # each processor. Start by finding out which data each processor owns. ### find out which block of data my node owns for the global array "g_a" ### and store the contents of the arrays into "lo" and "hi" # Get locally held data and copy it into local buffer a ### use the arrays "lo" and "hi" to copy the locally held block of data ### from the global array "g_a" into the local array "a". # Invert data locally b = a[::-1] # Invert data globally by copying locally inverted blocks into # their inverted positions in the GA lo2 = [dims[0]-hi[0]] hi2 = [dims[0]-lo[0]] ### copy data from the local array "b" into the block of the global ### array "g_a" described by the integer arrays "lo" and "hi" # Synchronize all processors to make sure inversion is complete ### synchronize all processors # Check to see if inversion is correct if not me: verify(g_a, g_b)
def TRANSPOSE1D(): # Configure array dimensions. Force an unequal data distribution. dims = [nprocs * TOTALELEMS + nprocs / 2] chunk = [TOTALELEMS] # minimum data on each process # create a global array g_a and duplicate it to get g_b g_a = ga.create(ga.C_INT, dims, "array A", chunk) if not g_a: ga.error("create failed: A") if not me: print "Created Array A" g_b = ga.duplicate(g_a, "array B") if not g_b: ga.error("duplicate failed") if not me: print "Created Array B" # initialize data in g_a if not me: print "Initializing matrix A" ga.put(g_a, np.arange(dims[0], dtype=np.int32)) # Synchronize all processors to guarantee that everyone has data # before proceeding to the next step. ga.sync() # Start initial phase of inversion by inverting the data held locally on # each processor. Start by finding out which data each processor owns. lo, hi = ga.distribution(g_a) # Get locally held data and copy it into local buffer a a = ga.get(g_a, lo, hi) # Invert data locally b = a[::-1] # Invert data globally by copying locally inverted blocks into # their inverted positions in the GA ga.put(g_b, b, dims[0] - hi[0], dims[0] - lo[0]) # Synchronize all processors to make sure inversion is complete ga.sync() # Check to see if inversion is correct if not me: verify(g_a, g_b) # Deallocate arrays ga.destroy(g_a) ga.destroy(g_b)
def TRANSPOSE1D(): # Configure array dimensions. Force an unequal data distribution. dims = [nprocs*TOTALELEMS + nprocs/2] chunk = [TOTALELEMS] # minimum data on each process # create a global array g_a and duplicate it to get g_b g_a = ga.create(ga.C_INT, dims, "array A", chunk) if not g_a: ga.error("create failed: A") if not me: print "Created Array A" g_b = ga.duplicate(g_a, "array B") if not g_b: ga.error("duplicate failed") if not me: print "Created Array B" # initialize data in g_a if not me: print "Initializing matrix A" ga.put(g_a, np.arange(dims[0], dtype=np.int32)) # Synchronize all processors to guarantee that everyone has data # before proceeding to the next step. ga.sync() # Start initial phase of inversion by inverting the data held locally on # each processor. Start by finding out which data each processor owns. lo,hi = ga.distribution(g_a) # Get locally held data and copy it into local buffer a a = ga.get(g_a, lo, hi) # Invert data locally b = a[::-1] # Invert data globally by copying locally inverted blocks into # their inverted positions in the GA ga.put(g_b, b, dims[0]-hi[0], dims[0]-lo[0]) # Synchronize all processors to make sure inversion is complete ga.sync() # Check to see if inversion is correct if not me: verify(g_a, g_b) # Deallocate arrays ga.destroy(g_a) ga.destroy(g_b)
def verify(g_a, g_b, g_c): g_chk = ga.duplicate(g_a, "array check") if not g_chk: ga.error("duplicate failed") ga.sync() ga.gemm(False, False, TOTALELEMS, TOTALELEMS, TOTALELEMS, 1.0, g_a, g_b, 0.0, g_chk) ga.sync() ga.add(g_c, g_chk, g_chk, 1.0, -1.0) rchk = ga.dot(g_chk, g_chk) if not me: print "Normed difference in matrices: %12.4f" % rchk if not (-TOLERANCE < rchk < TOLERANCE): ga.error("Matrix multiply verify failed") else: print "Matrix Multiply OK" ga.destroy(g_chk)
def verify(g_a, g_b, g_c): g_chk = ga.duplicate(g_a, "array check") if not g_chk: ga.error("duplicate failed") ga.sync() ga.gemm(False, False, TOTALELEMS, TOTALELEMS, TOTALELEMS, 1.0, g_a, g_b, 0.0, g_chk); ga.sync() ga.add(g_c, g_chk, g_chk, 1.0, -1.0) rchk = ga.dot(g_chk, g_chk) if not me: print "Normed difference in matrices: %12.4f" % rchk if not (-TOLERANCE < rchk < TOLERANCE): ga.error("Matrix multiply verify failed") else: print "Matrix Multiply OK" ga.destroy(g_chk)
def matrix_multiply(): # Configure array dimensions. Force an unequal data distribution. dims = [TOTALELEMS]*NDIM chunk = [TOTALELEMS/nprocs-1]*NDIM # Create a global array g_a and duplicate it to get g_b and g_c. ### create GA of doubles with dimensions "dims", with minimum block size ### "chunk", and with name "array A", and assign the handle to the integer ### variable "g_a". if not g_a: ga.error("create failed: A") if not me: print "Created Array A" ### Duplicate array "g_a" to create arrays "g_b" and "g_c" with array ### names "array B" and "array C", respectively. if not g_b or not g_c: ga.eror("duplicate failed") if not me: print "Created Arrays B and C" # Initialize data in matrices a and b. if not me: print "Initializing matrix A and B" a = np.random.rand(*dims)*29 b = np.random.rand(*dims)*37 # Copy data to global arrays g_a and g_b. if not me: ### copy the contents of array "a" into the global array "g_a" ### similarly for "b" # Synchronize all processors to make sure everyone has data. ### Synchronize all processors # Determine which block of data is locally owned. Note that # the same block is locally owned for all GAs. ### find out which block of data my node owns for the global array "g_c" ### and store the contents in the integer arrays "lo" and "hi" # Get the blocks from g_a and g_b needed to compute this block in # g_c and copy them into the local buffers a and b. lo2 = (lo[0],0) hi2 = (hi[0],dims[0])) ### copy the block of data described by the arrays "lo2" and "hi2" from ### the global array "g_a" in to the local array "a" lo3 = (0,lo[1]) hi3 = (dims[1],hi[1])) ### copy the block of data described by the arrays "lo3" and "hi3" from ### the global array "g_b" in to the local array "b" # Do local matrix multiplication and store the result in local # buffer c. Start by evaluating the transpose of b. btrns = b.transpose() # Multiply a and b to get c. c = np.dot(a,b) # Copy c back to g_c. ### copy data from the local array "c" into the block of the global array ### "g_c" described by the integer arrays "lo" and "hi". verify(g_a, g_b, g_c) # Deallocate arrays. ### destroy the global arrays "g_a", "g_b", "g_c" if __name__ == '__main__': if not me: print "\nUsing %d processes\n" % nprocs matrix_multiply() if not me: print "\nTerminating..."
import mpi4py.MPI # initialize Message Passing Interface import ga # initialize Global Arrays import numpy as np me = ga.nodeid() nproc = ga.nnodes() def print_distribution(g_a): for i in range(ga.nnodes()): lo,hi = ga.distribution(g_a, i) print "P=%s lo=%s hi=%s" % (i,lo,hi) # create some irregular arrays block = [3,2] map = [0,2,6,0,5] if nproc < np.prod(block): raise ValueError, "ERROR: fewer procs than requested blocks" g_a = ga.create_irreg(ga.C_DBL, [8,10], block, map, "Array A") if not g_a: ga.error("Could not create global array A",g_a) g_b = ga.create(ga.C_INT, (2,3,4,5,6)) if not me: print_distribution(g_a) print_distribution(g_b)