for K, C, N in ((32, 4096, 1512), ): for alpha, beta in ((1.0, 0.0), (0.5, 0.5)): for op, dimA, dimB, dimC in ( ("nn", (K, C), (C, N), (K, N)), # fprop ("tn", (K, C), (K, N), (C, N)), # bprop ("nt", (K, N), (C, N), (K, C)), ): # update devA1 = ng.empty(dimA, dtype=dtype) devB1 = ng.empty(dimB, dtype=dtype) devC1 = ng.empty(dimC, dtype=dtype) # fill with uniform randoms from -1 to 1 devA1[:] = 2 * (.5 - ng.rand()) devB1[:] = 2 * (.5 - ng.rand()) devC1[:] = 2 * (.5 - ng.rand()) # just alias if same dtype if dtype is np.float32: devA2 = devA1 devB2 = devB1 # otherwise copy else: devA2 = ng.empty(dimA, dtype=np.float32) devB2 = ng.empty(dimB, dtype=np.float32) devA2[:] = devA1 devB2[:] = devB1 devC2 = ng.empty(dimC, dtype=np.float32)
#(3072,3072,32+128*1),(3072,3072,64+128*1),(3072,3072,96+128*1),(3072,3072,128+128*1), #(3072,3072,32+128*2),(3072,3072,64+128*2),(3072,3072,96+128*2),(3072,3072,128+128*2), #(3072,3072,32+128*3),(3072,3072,64+128*3),(3072,3072,96+128*3),(3072,3072,128+128*3),): for op, dimA, dimB, dimC in ( ("nn", (K,C), (C,N), (K,N) ), # fprop ("tn", (K,C), (K,N), (C,N) ), # bprop ("nt", (K,N), (C,N), (K,C) )): # update repeat = 5000 if C <= 3072 else 500 devA1 = ng.empty(dimA, dtype=dtype) devB1 = ng.empty(dimB, dtype=dtype) devC1 = ng.empty(dimC, dtype=dtype) # fill with uniform randoms from -1 to 1 devA1[:] = 2 * (.5 - ng.rand()) devB1[:] = 2 * (.5 - ng.rand()) # just alias if same dtype if dtype is np.float32: devA2 = devA1 devB2 = devB1 # otherwise copy else: devA2 = ng.empty(dimA, dtype=np.float32) devB2 = ng.empty(dimB, dtype=np.float32) devA2[:] = devA1 devB2[:] = devB1 devC2 = ng.empty(dimC, dtype=np.float32)
for K, C, N in ((3072,3072,32),): total = 0 for op, dimA, dimB, dimC in ( ("nn", (K,C), (C,N), (K,N) ), # fprop ("tn", (K,C), (K,N), (C,N) ), # bprop ("nt", (K,N), (C,N), (K,C) ),): # update devA = ng.empty(dimA, dtype=np.float32) devB = ng.empty(dimB, dtype=np.float32) devC = ng.empty(dimC, dtype=np.float32) # fill with uniform randoms from -1 to 1 devA[:] = 2 * (.5 - ng.rand()) devB[:] = 2 * (.5 - ng.rand()) total += cublas_dot(op, devA, devB, devC, repeat=repeat, warmup=True) print "N2 Total: ", total total = 0 for op, dimA, dimB, dimC in ( ("nt", (N,C), (K,C), (N,K) ), # fprop ("nn", (N,K), (K,C), (N,C) ), # bprop ("tn", (N,K), (N,C), (K,C) ),): # update devA = ng.empty(dimA, dtype=np.float32) devB = ng.empty(dimB, dtype=np.float32) devC = ng.empty(dimC, dtype=np.float32)
str_d, str_h, str_w = conv.strides alpha, beta = (1.0, 0.0) dimI = conv.dimI2 dimF = conv.dimF2 dimO = conv.dimO2 print "cudnn:" cuI = ng.empty(dimI[::-1], dtype=np.float32) cuF = ng.empty(dimF[::-1], dtype=np.float32) cuE = ng.empty(dimO[::-1], dtype=np.float32) cuB = ng.empty(dimI[::-1], dtype=np.float32) cuU = ng.empty(dimF[::-1], dtype=np.float32) cuO = ng.empty(dimO[::-1], dtype=np.float32) cuI[:] = 2 * (.5 - ng.rand()) cuF[:] = 2 * (.5 - ng.rand()) cuE[:] = 2 * (.5 - ng.rand()) #print drv.mem_get_info() I_data = ctypes.c_void_p(int(cuI.gpudata)) F_data = ctypes.c_void_p(int(cuF.gpudata)) O_data = ctypes.c_void_p(int(cuO.gpudata)) E_data = ctypes.c_void_p(int(cuE.gpudata)) B_data = ctypes.c_void_p(int(cuB.gpudata)) U_data = ctypes.c_void_p(int(cuU.gpudata)) libcudnn.cudnnSetConvolution2dDescriptor(C_desc, pad_h, pad_w, str_h, str_w, 1, 1, conv_mode) libcudnn.cudnnSetTensor4dDescriptor(I_desc, NCHW_fmt, cu_dtype, N, C, H, W)
for K, C, N in ((3072,3072,32),): total = 0 for op, dimA, dimB, dimC in ( ("nn", (K,C), (C,N), (K,N) ), # fprop ("tn", (K,C), (K,N), (C,N) ), # bprop ("nt", (K,N), (C,N), (K,C) ),): # update devA = ng.empty(dimA, dtype=np.float32) devB = ng.empty(dimB, dtype=np.float32) devC = ng.empty(dimC, dtype=np.float32) # fill with uniform randoms from -1 to 1 devA[:] = 2 * (.5 - ng.rand()) devB[:] = 2 * (.5 - ng.rand()) total += cublas_dot(op, devA, devB, devC, repeat=repeat, warmup=True) print("N2 Total: ", total) total = 0 for op, dimA, dimB, dimC in ( ("nt", (N,C), (K,C), (N,K) ), # fprop ("nn", (N,K), (K,C), (N,C) ), # bprop ("tn", (N,K), (N,C), (K,C) ),): # update devA = ng.empty(dimA, dtype=np.float32) devB = ng.empty(dimB, dtype=np.float32) devC = ng.empty(dimC, dtype=np.float32)