def test2d(self): n = 8 # global number of cells ns = (n, n) # domain decomposition dc = CubeDecomp(self.sz, ns) if not dc.getDecomp(): print('*** ERROR Invalid domain decomposition -- rerun with different sizes/number of procs') sys.exit(1) ndims = dc.getNumDims() # local start/end grid indices slab = dc.getSlab(self.rk) # global domain boundaries xmins = numpy.array([0.0 for i in range(ndims)]) xmaxs = numpy.array([1.0 for i in range(ndims)]) # local cell centered coordinates axes = [] hs = [] nsLocal = [] for i in range(ndims): ibeg, iend = slab[i].start, slab[i].stop nsLocal.append(iend - ibeg) h = (xmaxs[i] - xmins[i]) / float(ns[i]) ax = xmins[i] + h*(numpy.arange(ibeg, iend) + 0.5) hs.append(h) axes.append(ax) op = StencilOperator(dc, periodic=(False, False)) for i in range(ndims): disp = [0] * ndims for pm in (-1, 1): disp[i] = pm op.addStencilBranch(tuple(disp), 1.0) op.addStencilBranch(tuple([0]*ndims), -2*ndims) # set the input function xx = numpy.outer(axes[0], numpy.ones((nsLocal[1],))) yy = numpy.outer(numpy.ones((nsLocal[0],)), axes[1]) inp = 0.5 * xx * yy ** 2 #print('[{0}] inp = {1}'.format(self.rk, str(inp))) out = op.apply(inp) / hs[0]**2 # NEED TO ADJUST WHEN CELL SIZE IS DIFFERENT IN Y! #print('[{0}] out = {1}'.format(self.rk, str(out))) # check sum localChkSum = numpy.sum(out.flat) chksum = numpy.sum(MPI.COMM_WORLD.gather(localChkSum, 0)) if self.rk == 0: print('test2d check sum = {}'.format(chksum)) self.assertLessEqual(abs(chksum - -198.0), 1.e-10)
def test1d(self): n = 8 # global number of cells ns = (n,) # domain decomposition dc = CubeDecomp(self.sz, ns) if not dc.getDecomp(): print('*** ERROR Invalid domain decomposition -- rerun with different sizes/number of procs') sys.exit(1) ndims = dc.getNumDims() # local start/end grid indices slab = dc.getSlab(self.rk) # global domain boundaries xmins = numpy.array([0.0 for i in range(ndims)]) xmaxs = numpy.array([1.0 for i in range(ndims)]) # local cell centered coordinates axes = [] hs = [] for i in range(ndims): ibeg, iend = slab[i].start, slab[i].stop h = (xmaxs[i] - xmins[i]) / float(ns[i]) ax = xmins[i] + h*(numpy.arange(ibeg, iend) + 0.5) hs.append(h) axes.append(ax) op = StencilOperator(dc, periodic=(False,)) op.addStencilBranch((1,), 1.0) op.addStencilBranch((-1,), 1.0) op.addStencilBranch((0,), -2.0) # set the input function inp = 0.5 * axes[0]**2 #print('[{0}] inp = {1}'.format(self.rk, str(inp))) out = op.apply(inp) / hs[0]**2 #print('[{0}] out = {1}'.format(self.rk, str(out))) # check sum localChkSum = numpy.sum(out.flat) chksum = numpy.sum(MPI.COMM_WORLD.gather(localChkSum, 0)) if self.rk == 0: print('test1d check sum = {}'.format(chksum)) self.assertLessEqual(abs(chksum - -28.25), 1.e-10)
def test2d(disp, dtyp): rk = MPI.COMM_WORLD.Get_rank() sz = MPI.COMM_WORLD.Get_size() dims = (3, 3) globalDims = (3 * sz, sz) decomp = CubeDecomp(nprocs=sz, dims=globalDims) so = StencilOperator(decomp, periodic=[True, True]) so.addStencilBranch(disp, 2) inputData = (rk + 1) * numpy.ones(dims, dtyp) outputData = so.apply(inputData) print('[{0}] inputData = {1}'.format(rk, inputData)) print('[{0}] outputData = {1}'.format(rk, outputData)) MPI.COMM_WORLD.Barrier()
def test3d(self): n = 8 # global number of cells ns = (n, n, n) # domain decomposition dc = CubeDecomp(self.sz, ns) if not dc.getDecomp(): print('*** ERROR Invalid domain decomposition -- rerun with different sizes/number of procs') sys.exit(1) ndims = dc.getNumDims() # local start/end grid indices slab = dc.getSlab(self.rk) # global domain boundaries xmins = numpy.array([0.0 for i in range(ndims)]) xmaxs = numpy.array([1.0 for i in range(ndims)]) # local cell centered coordinates axes = [] hs = [] nsLocal = [] iBegs = [] iEnds = [] for i in range(ndims): ibeg, iend = slab[i].start, slab[i].stop iBegs.append(ibeg) iEnds.append(iend) nsLocal.append(iend - ibeg) h = (xmaxs[i] - xmins[i]) / float(ns[i]) ax = xmins[i] + h*(numpy.arange(ibeg, iend) + 0.5) hs.append(h) axes.append(ax) op = StencilOperator(dc, periodic=(False, False, True)) for i in range(ndims): disp = [0] * ndims for pm in (-1, 1): disp[i] = pm op.addStencilBranch(tuple(disp), 1.0) op.addStencilBranch(tuple([0]*ndims), -2*ndims) # set the input function inp = numpy.zeros((iEnds[0] - iBegs[0], iEnds[1] - iBegs[1], iEnds[2] - iBegs[2]), numpy.float64) for ig in range(iBegs[0], iEnds[0]): i = ig - iBegs[0] x = axes[0][i] for jg in range(iBegs[1], iEnds[1]): j = jg - iBegs[1] y = axes[1][j] for kg in range(iBegs[2], iEnds[2]): k = kg - iBegs[2] z = axes[2][k] inp[i, j, k] = 0.5 * x * y**2 # check sum of input localChkSum = numpy.sum(inp.flat) chksum = numpy.sum(MPI.COMM_WORLD.gather(localChkSum, 0)) if self.rk == 0: print('test3d check sum of input = {}'.format(chksum)) out = op.apply(inp) # check sum localChkSum = numpy.sum(out.flat) chksum = numpy.sum(MPI.COMM_WORLD.gather(localChkSum, 0)) if self.rk == 0: print('test3d check sum = {}'.format(chksum)) self.assertLessEqual(abs(chksum - -24.75), 1.e-10)
# global domain sizes nx, ny = 12, 36 # domain sizes xMin, xMax = 0.0, 1.0 yMin, yMax = 0.0, 1.0 dx, dy = (xMax - xMin) / float(nx), (yMax - yMin) / float(ny) # axes, cell centered xs = numpy.array([xMin + dx * (i + 0.5) for i in range(nx)]) ys = numpy.array([yMin + dy * (j + 0.5) for j in range(ny)]) # domain decomposition dc = CubeDecomp(sz, (nx, ny)) # the decomp must be regular if not dc.getDecomp(): if rk == 0: print('no decomp could be found, adjust the number of procs') MPI.Finalize() sys.exit(1) # number of procs in each direction npx, npy = dc.getDecomp() # list of slices slab = dc.getSlab(rk) # starting/ending indices for local domain
for j in range(data.shape[1]): jG = jBeg + j dj = jG - 0.8 * nyG data[i, j] = numpy.floor( 1.9 * numpy.exp(-di**2 / nxGHalf**2 - dj**2 / nyGHalf**2)) # local rank and number of procs rk = MPI.COMM_WORLD.Get_rank() sz = MPI.COMM_WORLD.Get_size() # global domain sizes nxG, nyG = 128, 256 # domain decomposition dc = CubeDecomp(sz, (nxG, nyG)) # starting/ending global indices for local domain slab = dc.getSlab(rk) iBeg, iEnd = slab[0].start, slab[0].stop jBeg, jEnd = slab[1].start, slab[1].stop # local domain sizes nx, ny = iEnd - iBeg, jEnd - jBeg # the decomp must be regular if not dc.getDecomp(): if rk == 0: print('no decomp could be found, adjust the number of procs') MPI.Finalize() sys.exit(1)
if rk == 0: print('Number of procs: {}'.format(sz)) print('Number of cells nx, ny, nz = {0}, {1}, {2}'.format(nx, ny, nz)) print('Number of times Laplacian operator is applied = {0}'.format(nTimes)) # domain sizes xMin, xMax = 0.0, 1.0 yMin, yMax = 0.0, 1.0 zMin, zMax = 0.0, 1.0 dx = (xMax - xMin)/float(nx) dy = (yMax - yMin)/float(ny) dz = (zMax - zMin)/float(nz) # domain dc.sition dc = CubeDecomp(sz, (nx, ny, nz)) # the dc.must be regular if not dc.getDecomp(): if rk == 0: print('no decomp could be found, adjust the number of procs') MPI.Finalize() sys.exit(1) # number of procs in each direction npx, npy, npz = dc.getDecomp() if rk == 0: print('Number of procs in x, y, z = {0}, {1}, {2}'.format(npx, npy, npz)) # list of slices slab = dc.getSlab(rk)
def test2d_laplacian_periodic(self): """ 2d array, apply Laplacian, periodic along the two axes """ from mpinum import CubeDecomp from mpinum import MultiArrayIter import operator from math import sin, pi # global sizes ndims = 2 #ns = numpy.array([60] * ndims) ns = numpy.array([3*4] * ndims) # local rank and number of procs rk = MPI.COMM_WORLD.Get_rank() sz = MPI.COMM_WORLD.Get_size() # find a domain decomposition dc = CubeDecomp(sz, ns) # not all numbers of procs will give a uniform domain decomposition, # exit if none can be found if not dc.getDecomp(): if rk == 0: print('no decomp could be found, adjust the number of procs') return # get the local start/stop indices along each axis as a list of # 1d slices localSlices = dc.getSlab(rk) iBeg = numpy.array([s.start for s in localSlices]) iEnd = numpy.array([s.stop for s in localSlices]) nsLocal = numpy.array([s.stop - s.start for s in localSlices]) # create the dist arrays da = mpinum.gdaZeros(nsLocal, numpy.float32, numGhosts=1) laplacian = mpinum.gdaZeros(nsLocal, numpy.float32, numGhosts=1) # set the data for it in MultiArrayIter(nsLocal): localInds = it.getIndices() globalInds = iBeg + localInds # positions are cell centered, domain is [0, 1]^ndims position = (globalInds + 0.5)/ numpy.array(ns, numpy.float32) # sin(2*pi*x) * sin(2*pi*y) ... da[tuple(localInds)] = reduce(operator.mul, [numpy.sin(2*numpy.pi*position[i]) for i in range(ndims)]) # apply the Laplacian finite difference operator. # Start by performing all the operations that do # not require any communication. laplacian[:] = 2 * ndims * da # now subtract the neighbor values which are local to this process for idim in range(ndims): # indices shifted in the + direction along axis idim slabP = [slice(None, None, None) for j in range(idim)] + \ [slice(1, None, None)] + \ [slice(None, None, None) for j in range(idim + 1, ndims)] # indices shifted in the - direction along axis idim slabM = [slice(None, None, None) for j in range(idim)] + \ [slice(0, -1, None)] + \ [slice(None, None, None) for j in range(idim + 1, ndims)] laplacian[slabP] -= da[slabM] # subtract left neighbor laplacian[slabM] -= da[slabP] # subtract right neighbor # fetch the data located on other procs periodic = [True for idim in range(ndims)] for idim in range(ndims): # define the positive and negative directions directionP = tuple([0 for j in range(idim)] + [1] + [0 for j in range(idim + 1, ndims)]) directionM = tuple([0 for j in range(idim)] + [-1] + [0 for j in range(idim + 1, ndims)]) procP = dc.getNeighborProc(rk, directionP, periodic=periodic) procM = dc.getNeighborProc(rk, directionM, periodic=periodic) # this is where communication takes place... Note that when # accessing the data on the low-end side on rank procM we # access the slide on the positive side on procM (directionP). # And inversely for the high-end side data... dataM = da.getData(procM, winID=directionP) dataP = da.getData(procP, winID=directionM) # finish off the operator laplacian[da.getEllipsis(winID=directionM)] -= dataM laplacian[da.getEllipsis(winID=directionP)] -= dataP # compute a checksum and send the result to rank 0 checksum = laplacian.reduce(lambda x,y:abs(x) + abs(y), 0.0, rootPe=0) if rk == 0: print('checksum = ', checksum) # float32 calculation has higher error assert(abs(checksum - 32.0) < 1.e-4) # free the windows da.free() laplacian.free()