(D, H, W) - depth, height and width of input image (T, R, S) - depth, height and width of filter kernels padding_{x,y,z} - zero padding strides_{x,y,z} - filter striding upscale_{x,y,z} - upscaling [*] Chetlur et al. 'cuDNN: Efficient primitives for deep learning.' arXiv:1410.0759 """ import numpy as np import struct import pycuda.driver as drv from flexpt_array import Flexpt import pycuda.autoinit # select kernel set (just one in this release, more later) fp = Flexpt(kernel_set="fgemm_float32_wide64", bench=True) # set dims for layer 5 of Alexnet N,C,K = (128,192,384) D,H,W = (1,13,13) T,R,S = (1,3,3) # set padding, stride and upscale padding_z, padding_y, padding_x = (0,0,0) strides_z, strides_y, strides_x = (1,1,1) upscale_z, upscale_y, upscale_x = (1,1,1) # set input integer word length iwl = 15 # input dimensions
# ---------------------------------------------------------------------------- # Copyright 2014 Nervana Systems Inc. All rights reserved. # ---------------------------------------------------------------------------- import numpy as np import pycuda.driver as drv from flexpt_array import Flexpt from pycuda.autoinit import context import struct fp = Flexpt(kernel_set="fgemm_float32_wide64", calc_partials=False) op = "nt" # n == not transpose, t == transpose m = 4096 n = 4096 k = 4096 repeat = 50 iwlA = 15 iwlB = 15 if op == "nt": dim1 = (k,m) dim2 = (k,n) elif op == "nn": dim1 = (m,k) dim2 = (k,n) elif op == "tn": dim1 = (m,k) dim2 = (n,k) A1 = np.random.randint(0x0, 0x7fff, size=dim1).astype(np.int64) B1 = np.random.randint(0x0, 0x7fff, size=dim2).astype(np.int64)
# ---------------------------------------------------------------------------- # Copyright 2014 Nervana Systems Inc. All rights reserved. # ---------------------------------------------------------------------------- import numpy as np import pycuda.driver as drv from flexpt_array import Flexpt from pycuda.autoinit import context import struct fp = Flexpt(kernel_set="fgemm_float32_wide64", calc_partials=False) op = "nt" # n == not transpose, t == transpose m = 4096 n = 4096 k = 4096 repeat = 50 iwlA = 15 iwlB = 15 if op == "nt": dim1 = (k, m) dim2 = (k, n) elif op == "nn": dim1 = (m, k) dim2 = (k, n) elif op == "tn": dim1 = (m, k) dim2 = (n, k) A1 = np.random.randint(0x0, 0x7fff, size=dim1).astype(np.int64) B1 = np.random.randint(0x0, 0x7fff, size=dim2).astype(np.int64)
# ---------------------------------------------------------------------------- # Copyright 2014 Nervana Systems Inc. All rights reserved. # ---------------------------------------------------------------------------- """ A benchmark script for Soumith and his page: https://github.com/soumith/convnet-benchmarks """ import numpy as np import struct import pycuda.driver as drv from flexpt_array import Flexpt import pycuda.autoinit fp = Flexpt(kernel_set="fgemm_float32_wide64", bench=True) def scale(n, q): return ((struct.unpack('I', struct.pack('f', float(0x7fff**2 * n) / q)))[0] >> 23) - 126 def go(N, C, K, D, H, W, T, R, S): # input dimensions dimI = (C, D, H, W, N) dimF = (C, T, R, S, K) # set padding, stride and upscale padding_z, padding_y, padding_x = (0, 0, 0) strides_z, strides_y, strides_x = (1, 1, 1)