def main(): from verification_utils import run_verification dim_list = [[16, 32, 64]] # Selected MPI topologies top_list= [[1,1,1], [1,1,2], [2,2,1], [2,2,2], ] for k in [0,1]: for npx,npy,npz in top_list: for nlx,nly,nlz in dim_list: for kernel in [0,2,3,4,5]: nx = nlx*npx ny = nly*npy nz = nlz*npz run_verification(nx=nx, ny=ny, nz=nz, ts=k, npx=npx, npy=npy, npz=npz, kernel=kernel) dim_list = [[16, 32, 64]] # Selected MPI topologies top_list= [[1,1,1], [1,1,2], ] for k in [0, 1]: for npx,npy,npz in top_list: for nlx,nly,nlz in dim_list: for kernel in [1,2,3]: nx = nlx*npx ny = nly*npy nz = nlz*npz run_verification(nx=nx, ny=ny, nz=nz, ts=k, npx=npx, npy=npy, npz=npz, kernel=kernel, dp=1, concat=1) for k in [0, 1]: for npx,npy,npz in top_list: for nlx,nly,nlz in dim_list: for kernel in [1]: nx = nlx*npx ny = nly*npy nz = nlz*npz run_verification(nx=nx, ny=ny, nz=nz, ts=k, npx=npx, npy=npy, npz=npz, kernel=kernel, concat=1) for k in [0, 1]: for npx,npy,npz in top_list: for nlx,nly,nlz in dim_list: for kernel in [2]: nx = nlx*npx ny = nly*npy nz = nlz*npz run_verification(nx=nx, ny=ny, nz=nz, ts=k, npx=npx, npy=npy, npz=npz, kernel=kernel, dp=1)
def verification_idiam(k): from verification_utils import run_verification npx =1 npy =1 npz =1 dim_list = [[32, 480, 128,5, 2], [64, 480, 128,5, 2]] print "Verifying different wavefront paralellization stratigies" th = 10 for mwdt in [0,1,2,3]: for nlx, nly, nlz, t, tgs in dim_list: for kernel, R in [(1,1), (0,4)]: nx = nlx*npx ny = nly*npy nz = nlz*npz if th <= nly/((t+1)*2*R): # enough concurrency in the wavefront for the provided threads print "MWD type:%d" % mwdt run_verification(nx=nx, ny=ny, nz=nz, ts=k, npx=npx, npy=npy, npz=npz, t_dim=t, kernel=kernel, num_threads=th, tgs=tgs, nwf=tgs*R, mwd_type=mwdt) dim_list = [[16, 128, 128,1, 1], [32, 480, 128,5, 2], [64, 480, 128,5, 2]] print "Verifying different values of cache blocking in X" for th in range(2,21,2): for nlx, nly, nlz, t, tgs in dim_list: for kernel, R in [(1,1), (4,4)]: nx = nlx*npx ny = nly*npy nz = nlz*npz if th <= nly/((t+1)*2*R): # enough concurrency in the wavefront for the provided threads run_verification(nx=nx, ny=ny, nz=nz, ts=k, npx=npx, npy=npy, npz=npz, t_dim=t, kernel=kernel, num_threads=th, tgs=tgs, nwf=tgs) print "Verifying different multi-core wavefront threads/tile_size combinations" dim_list = [[16, 128, 128,1], [16, 480, 128,5], [16, 640, 128,7]] tgs=1 for th in range(1,20): for nlx, nly, nlz, t in dim_list: for kernel in [1]: nx = nlx*npx ny = nly*npy nz = nlz*npz if th <= nly/((t+1)*2): # enough concurrency in the wavefront for the provided threads run_verification(nx=nx, ny=ny, nz=nz, ts=k, npx=npx, npy=npy, npz=npz, t_dim=t, kernel=kernel, num_threads=th, tgs=tgs, nwf=tgs) for th, tgs in [(16, 16), (16, 8), (16, 4), (16, 2), (8,8), (8,4), (8,2)]: for nlx, nly, nlz, t in dim_list: for kernel in [1]: nx = nlx*npx ny = nly*npy nz = nlz*npz # if th < (t+1)**2: # enough concurrency in the wavefront for the provided threads run_verification(nx=nx, ny=ny, nz=nz, ts=k, npx=npx, npy=npy, npz=npz, t_dim=t, kernel=kernel, num_threads=th, tgs=tgs, nwf=tgs) print "Verifying general domain size and network topology combinations" dim_list = [[16, 32, 128,1], [16, 64, 128, 3], [16, 64, 128, 7], [32, 128, 128, 7]] # Selected MPI topologies top_list= [[1,1,1], [1,2,1], [1,4,1], [1,8,1]] for npx,npy,npz in top_list: for nlx, nly, nlz, t in dim_list: for kernel in [0,1,2,3,4,5]: nx = nlx*npx ny = nly*npy nz = nlz*npz np = npx*npy*npz tgs = max(1, min(8,16/np)) run_verification(nx=nx, ny=ny, nz=nz, ts=k, npx=npx, npy=npy, npz=npz, t_dim=t, kernel=kernel, tgs=tgs, nwf=tgs) print "Verifying temporal blocks with increasing size" dim_list = [[16, 96, 128, 5], [16, 128, 128, 7], [16, 160, 256, 9], [16, 224, 512, 13], [16, 480, 512, 29]] # Selected MPI topologies top_list= [[1,1,1], [1,4,1]] for npx,npy,npz in top_list: for nlx, nly, nlz, t in dim_list: for kernel in [0,1,2,3,4,5]: nx = nlx*npx ny = nly*npy nz = nlz*npz np = npx*npy*npz tgs = max(1, min(10,20/np)) run_verification(nx=nx, ny=ny, nz=nz, ts=k, npx=npx, npy=npy, npz=npz, t_dim=t, kernel=kernel, tgs=tgs, nwf=tgs) print "Verifying double precision + halo concatenation combinations" dim_list = [[16, 32, 128,3], [32, 128, 128, 7]] # Selected MPI topologies top_list= [[1,2,1], [1,4,1]] for npx,npy,npz in top_list: for nlx, nly, nlz, t in dim_list: for kernel in [1,2,3,4,5]: nx = nlx*npx ny = nly*npy nz = nlz*npz np = npx*npy*npz tgs = max(1, min(10/20/np)) run_verification(nx=nx, ny=ny, nz=nz, ts=k, npx=npx, npy=npy, npz=npz, t_dim=t, kernel=kernel, dp=1, concat=1, tgs=tgs, nwf=tgs) print "Verifying double precision combinations" for npx,npy,npz in top_list: for nlx, nly, nlz, t in dim_list: for kernel in [1,2,3,4,5]: nx = nlx*npx ny = nly*npy nz = nlz*npz np = npx*npy*npz tgs = max(1, min(10/20/np)) run_verification(nx=nx, ny=ny, nz=nz, ts=k, npx=npx, npy=npy, npz=npz, t_dim=t, kernel=kernel, dp=1, tgs=tgs, nwf=tgs) print "Verifying halo concatenation combinations" for npx,npy,npz in top_list: for nlx, nly, nlz, t in dim_list: for kernel in [0,1,2,3,4,5]: nx = nlx*npx ny = nly*npy nz = nlz*npz np = npx*npy*npz tgs = max(1, min(10,20/np)) run_verification(nx=nx, ny=ny, nz=nz, ts=k, npx=npx, npy=npy, npz=npz, t_dim=t, kernel=kernel, concat=1, tgs=tgs, nwf=tgs)
def main(): from verification_utils import run_verification dim_list = [[16, 32, 64]] # Selected MPI topologies top_list = [ [1, 1, 1], [1, 1, 2], [2, 2, 1], [2, 2, 2], ] for k in [0, 1]: for npx, npy, npz in top_list: for nlx, nly, nlz in dim_list: for kernel in [0, 2, 3, 4, 5]: nx = nlx * npx ny = nly * npy nz = nlz * npz run_verification(nx=nx, ny=ny, nz=nz, ts=k, npx=npx, npy=npy, npz=npz, kernel=kernel) dim_list = [[16, 32, 64]] # Selected MPI topologies top_list = [ [1, 1, 1], [1, 1, 2], ] for k in [0, 1]: for npx, npy, npz in top_list: for nlx, nly, nlz in dim_list: for kernel in [1, 2, 3]: nx = nlx * npx ny = nly * npy nz = nlz * npz run_verification(nx=nx, ny=ny, nz=nz, ts=k, npx=npx, npy=npy, npz=npz, kernel=kernel, dp=1, concat=1) for k in [0, 1]: for npx, npy, npz in top_list: for nlx, nly, nlz in dim_list: for kernel in [1]: nx = nlx * npx ny = nly * npy nz = nlz * npz run_verification(nx=nx, ny=ny, nz=nz, ts=k, npx=npx, npy=npy, npz=npz, kernel=kernel, concat=1) for k in [0, 1]: for npx, npy, npz in top_list: for nlx, nly, nlz in dim_list: for kernel in [2]: nx = nlx * npx ny = nly * npy nz = nlz * npz run_verification(nx=nx, ny=ny, nz=nz, ts=k, npx=npx, npy=npy, npz=npz, kernel=kernel, dp=1)
def main(): import sys from verification_utils import run_verification dim_list = [[32, 160, 128, 1], [32, 256, 64, 3]] # intra tile parallelizm: (tgs, nwf, thz, thy, thx) intra_tile_l = [(2, 2, 2, 1, 1), (2, 2, 1, 2, 1), (2, 1, 1, 1, 2), (4, 4, 4, 1, 1), (4, 4, 1, 1, 4), (4, 2, 1, 2, 2), (4, 2, 2, 1, 2), (4, 1, 1, 1, 4)] print "Verifying different wavefront paralellization stratigies" for kernel in [0, 1]: for nx, ny, nz, t in dim_list: for mwd_t in [0, 1, 2]: for tgs, nwf, thz, thy, thx in intra_tile_l: print "mwd_type:(tgs, nwf, thz, thy, thx) ", mwd_t, tgs, nwf, thz, thy, thx run_verification(nx=nx, ny=ny, nz=nz, ts=2, kernel=kernel, t_dim=t, mwd_type=mwd_t, tgs=tgs, num_threads=8, nwf=nwf, thx=thx, thy=thy, thz=thz) dim_list = [[32, 160, 128, 1], [32, 256, 64, 3]] print "Verifying different wavefront paralellization stratigies" for kernel in [0, 1, 5, 6]: for nx, ny, nz, t in dim_list: for mwd_t in [0, 1]: print "mwd_type: ", mwd_t for tgs in [1, 2, 5, 10]: if (not (tgs == 1 and mwd_t != 1)): run_verification(nx=nx, ny=ny, nz=nz, ts=2, kernel=kernel, t_dim=t, mwd_type=mwd_t, tgs=tgs, num_threads=10) # return print "Verifying MPI configurations" # Selected MPI topologies top_list = [[1, 1, 1], [1, 2, 1]] for npx, npy, npz in top_list: for nlx, nly, nlz, t in dim_list: for kernel in [0, 1, 2, 3, 4, 5]: nx = nlx * npx ny = nly * npy nz = nlz * npz run_verification(nx=nx, ny=ny, nz=nz, ts=2, npx=npx, npy=npy, npz=npz, kernel=kernel, t_dim=t) dim_list = [[16, 32, 128, 1], [16, 64, 32, 3]] # Selected MPI topologies top_list = [[1, 2, 1], [1, 4, 1]] for npx, npy, npz in top_list: for nlx, nly, nlz, t in dim_list: for kernel in [1, 2]: nx = nlx * npx ny = nly * npy nz = nlz * npz run_verification(nx=nx, ny=ny, nz=nz, ts=2, npx=npx, npy=npy, npz=npz, kernel=kernel, t_dim=t, dp=1, concat=1) for npx, npy, npz in top_list: for nlx, nly, nlz, t in dim_list: for kernel in [1, 2]: nx = nlx * npx ny = nly * npy nz = nlz * npz run_verification(nx=nx, ny=ny, nz=nz, ts=2, npx=npx, npy=npy, npz=npz, kernel=kernel, t_dim=t, dp=1) for npx, npy, npz in top_list: for nlx, nly, nlz, t in dim_list: for kernel in [0, 2]: nx = nlx * npx ny = nly * npy nz = nlz * npz run_verification(nx=nx, ny=ny, nz=nz, ts=2, npx=npx, npy=npy, npz=npz, kernel=kernel, t_dim=t, concat=1)
def verification_idiam(k): from verification_utils import run_verification npx = 1 npy = 1 npz = 1 dim_list = [[32, 480, 128, 5, 2], [64, 480, 128, 5, 2]] print "Verifying different wavefront paralellization stratigies" th = 10 for mwdt in [0, 1, 2, 3]: for nlx, nly, nlz, t, tgs in dim_list: for kernel, R in [(1, 1), (0, 4)]: nx = nlx * npx ny = nly * npy nz = nlz * npz if th <= nly / ((t + 1) * 2 * R): # enough concurrency in the wavefront for the provided threads print "MWD type:%d" % mwdt run_verification( nx=nx, ny=ny, nz=nz, ts=k, npx=npx, npy=npy, npz=npz, t_dim=t, kernel=kernel, num_threads=th, tgs=tgs, nwf=tgs * R, mwd_type=mwdt, ) dim_list = [[16, 128, 128, 1, 1], [32, 480, 128, 5, 2], [64, 480, 128, 5, 2]] print "Verifying different values of cache blocking in X" for th in range(2, 21, 2): for nlx, nly, nlz, t, tgs in dim_list: for kernel, R in [(1, 1), (4, 4)]: nx = nlx * npx ny = nly * npy nz = nlz * npz if th <= nly / ((t + 1) * 2 * R): # enough concurrency in the wavefront for the provided threads run_verification( nx=nx, ny=ny, nz=nz, ts=k, npx=npx, npy=npy, npz=npz, t_dim=t, kernel=kernel, num_threads=th, tgs=tgs, nwf=tgs, ) print "Verifying different multi-core wavefront threads/tile_size combinations" dim_list = [[16, 128, 128, 1], [16, 480, 128, 5], [16, 640, 128, 7]] tgs = 1 for th in range(1, 20): for nlx, nly, nlz, t in dim_list: for kernel in [1]: nx = nlx * npx ny = nly * npy nz = nlz * npz if th <= nly / ((t + 1) * 2): # enough concurrency in the wavefront for the provided threads run_verification( nx=nx, ny=ny, nz=nz, ts=k, npx=npx, npy=npy, npz=npz, t_dim=t, kernel=kernel, num_threads=th, tgs=tgs, nwf=tgs, ) for th, tgs in [(16, 16), (16, 8), (16, 4), (16, 2), (8, 8), (8, 4), (8, 2)]: for nlx, nly, nlz, t in dim_list: for kernel in [1]: nx = nlx * npx ny = nly * npy nz = nlz * npz # if th < (t+1)**2: # enough concurrency in the wavefront for the provided threads run_verification( nx=nx, ny=ny, nz=nz, ts=k, npx=npx, npy=npy, npz=npz, t_dim=t, kernel=kernel, num_threads=th, tgs=tgs, nwf=tgs, ) print "Verifying general domain size and network topology combinations" dim_list = [[16, 32, 128, 1], [16, 64, 128, 3], [16, 64, 128, 7], [32, 128, 128, 7]] # Selected MPI topologies top_list = [[1, 1, 1], [1, 2, 1], [1, 4, 1], [1, 8, 1]] for npx, npy, npz in top_list: for nlx, nly, nlz, t in dim_list: for kernel in [0, 1, 2, 3, 4, 5]: nx = nlx * npx ny = nly * npy nz = nlz * npz np = npx * npy * npz tgs = max(1, min(8, 16 / np)) run_verification( nx=nx, ny=ny, nz=nz, ts=k, npx=npx, npy=npy, npz=npz, t_dim=t, kernel=kernel, tgs=tgs, nwf=tgs ) print "Verifying temporal blocks with increasing size" dim_list = [[16, 96, 128, 5], [16, 128, 128, 7], [16, 160, 256, 9], [16, 224, 512, 13], [16, 480, 512, 29]] # Selected MPI topologies top_list = [[1, 1, 1], [1, 4, 1]] for npx, npy, npz in top_list: for nlx, nly, nlz, t in dim_list: for kernel in [0, 1, 2, 3, 4, 5]: nx = nlx * npx ny = nly * npy nz = nlz * npz np = npx * npy * npz tgs = max(1, min(10, 20 / np)) run_verification( nx=nx, ny=ny, nz=nz, ts=k, npx=npx, npy=npy, npz=npz, t_dim=t, kernel=kernel, tgs=tgs, nwf=tgs ) print "Verifying double precision + halo concatenation combinations" dim_list = [[16, 32, 128, 3], [32, 128, 128, 7]] # Selected MPI topologies top_list = [[1, 2, 1], [1, 4, 1]] for npx, npy, npz in top_list: for nlx, nly, nlz, t in dim_list: for kernel in [1, 2, 3, 4, 5]: nx = nlx * npx ny = nly * npy nz = nlz * npz np = npx * npy * npz tgs = max(1, min(10 / 20 / np)) run_verification( nx=nx, ny=ny, nz=nz, ts=k, npx=npx, npy=npy, npz=npz, t_dim=t, kernel=kernel, dp=1, concat=1, tgs=tgs, nwf=tgs, ) print "Verifying double precision combinations" for npx, npy, npz in top_list: for nlx, nly, nlz, t in dim_list: for kernel in [1, 2, 3, 4, 5]: nx = nlx * npx ny = nly * npy nz = nlz * npz np = npx * npy * npz tgs = max(1, min(10 / 20 / np)) run_verification( nx=nx, ny=ny, nz=nz, ts=k, npx=npx, npy=npy, npz=npz, t_dim=t, kernel=kernel, dp=1, tgs=tgs, nwf=tgs ) print "Verifying halo concatenation combinations" for npx, npy, npz in top_list: for nlx, nly, nlz, t in dim_list: for kernel in [0, 1, 2, 3, 4, 5]: nx = nlx * npx ny = nly * npy nz = nlz * npz np = npx * npy * npz tgs = max(1, min(10, 20 / np)) run_verification( nx=nx, ny=ny, nz=nz, ts=k, npx=npx, npy=npy, npz=npz, t_dim=t, kernel=kernel, concat=1, tgs=tgs, nwf=tgs, )
def main(): import sys from verification_utils import run_verification dim_list = [[32, 160, 128,1], [32, 256, 64, 3]] # intra tile parallelizm: (tgs, nwf, thz, thy, thx) intra_tile_l = [(2,2,2,1,1), (2,2,1,2,1), (2,1,1,1,2), (4,4,4,1,1), (4,4,1,1,4), (4,2,1,2,2), (4,2,2,1,2), (4,1,1,1,4) ] print "Verifying different wavefront paralellization stratigies" for kernel in [0, 1]: for nx, ny, nz, t in dim_list: for mwd_t in [0, 1, 2]: for tgs,nwf,thz,thy,thx in intra_tile_l: print "mwd_type:(tgs, nwf, thz, thy, thx) ",mwd_t, tgs,nwf,thz,thy,thx run_verification(nx=nx, ny=ny, nz=nz, ts=2, kernel=kernel, t_dim=t, mwd_type=mwd_t, tgs=tgs, num_threads=8, nwf=nwf, thx=thx, thy=thy, thz=thz) dim_list = [[32, 160, 128,1], [32, 256, 64, 3]] print "Verifying different wavefront paralellization stratigies" for kernel in [0, 1, 5, 6]: for nx, ny, nz, t in dim_list: for mwd_t in [0, 1]: print "mwd_type: ",mwd_t for tgs in [1, 2, 5, 10]: if (not(tgs==1 and mwd_t!=1)): run_verification(nx=nx, ny=ny, nz=nz, ts=2, kernel=kernel, t_dim=t, mwd_type=mwd_t, tgs=tgs, num_threads=10) # return print "Verifying MPI configurations" # Selected MPI topologies top_list= [[1,1,1], [1,2,1]] for npx,npy,npz in top_list: for nlx, nly, nlz, t in dim_list: for kernel in [0,1,2,3,4,5]: nx = nlx*npx ny = nly*npy nz = nlz*npz run_verification(nx=nx, ny=ny, nz=nz, ts=2, npx=npx, npy=npy, npz=npz, kernel=kernel, t_dim=t) dim_list = [[16, 32, 128,1], [16, 64, 32, 3]] # Selected MPI topologies top_list= [[1,2,1], [1,4,1]] for npx,npy,npz in top_list: for nlx, nly, nlz, t in dim_list: for kernel in [1,2]: nx = nlx*npx ny = nly*npy nz = nlz*npz run_verification(nx=nx, ny=ny, nz=nz, ts=2, npx=npx, npy=npy, npz=npz, kernel=kernel, t_dim=t, dp=1, concat=1) for npx,npy,npz in top_list: for nlx, nly, nlz, t in dim_list: for kernel in [1,2]: nx = nlx*npx ny = nly*npy nz = nlz*npz run_verification(nx=nx, ny=ny, nz=nz, ts=2, npx=npx, npy=npy, npz=npz, kernel=kernel, t_dim=t, dp=1) for npx,npy,npz in top_list: for nlx, nly, nlz, t in dim_list: for kernel in [0,2]: nx = nlx*npx ny = nly*npy nz = nlz*npz run_verification(nx=nx, ny=ny, nz=nz, ts=2, npx=npx, npy=npy, npz=npz, kernel=kernel, t_dim=t, concat=1)
def verification_std(k): from verification_utils import run_verification # general test print "Verifying general domain size and network topology combinations" dim_list = [[16, 32, 64], [32, 64, 16], [64, 16, 32]] # Selected MPI topologies top_list= [[1,1,1], #[2,1,1], [1,2,1], [1,1,2], [2,2,1], #[2,1,2], [1,2,2], [2,2,2], [4,1,1], [1,4,1], #[1,1,4], [4,2,1], #[8,1,1], #[1,8,1], #[1,1,8] ] for npx,npy,npz in top_list: for nlx,nly,nlz in dim_list: for kernel in [0,1,2,3,4,5]: nx = nlx*npx ny = nly*npy nz = nlz*npz run_verification(nx=nx, ny=ny, nz=nz, ts=k, npx=npx, npy=npy, npz=npz, kernel=kernel) # testing halo concatenation if(k == 1): short_dim_list2= [[16, 32, 64], [64, 16, 32]] # Selected MPI topologies short_top_list2=[[1,2,1], [2,2,1], [2,2,2], [4,1,1], ] print "Verifying halo concatenation combinations" for npx,npy,npz in short_top_list2: for nlx,nly,nlz in short_dim_list2: for kernel in [0,1,2,3,4,5]: nx = nlx*npx ny = nly*npy nz = nlz*npz run_verification(nx=nx, ny=ny, nz=nz, ts=k, npx=npx, npy=npy, npz=npz, kernel=kernel, concat=1) # Testing double precision + halo concatenation if(k == 1): print "Verifying double precision + halo concatenation combinations" short_dim_list1= [[16, 32, 64], [64, 16, 32]] short_top_list1=[[1,2,1], [2,2,1], [2,2,2], [4,1,1], ] for npx,npy,npz in short_top_list1: for nlx,nly,nlz in short_dim_list1: for kernel in [1,2,3,4,5]: nx = nlx*npx ny = nly*npy nz = nlz*npz run_verification(nx=nx, ny=ny, nz=nz, ts=k, npx=npx, npy=npy, npz=npz, kernel=kernel, dp=1, concat=1) # testing double precision print "Verifying double precision combinations" short_dim_list2= [[16, 32, 64], [64, 16, 32]] # Selected MPI topologies short_top_list2=[[1,2,1], [2,2,1], [2,2,2], [4,1,1], ] for npx,npy,npz in short_top_list2: for nlx,nly,nlz in short_dim_list2: for kernel in [1,2,3]: nx = nlx*npx ny = nly*npy nz = nlz*npz run_verification(nx=nx, ny=ny, nz=nz, ts=k, npx=npx, npy=npy, npz=npz, kernel=kernel, dp=1)