(D, H, W) - depth, height and width of input image (T, R, S) - depth, height and width of filter kernels padding_{x,y,z} - zero padding strides_{x,y,z} - filter striding upscale_{x,y,z} - upscaling [*] Chetlur et al. 'cuDNN: Efficient primitives for deep learning.' arXiv:1410.0759 """ import numpy as np import struct import pycuda.driver as drv from flexpt_array import Flexpt import pycuda.autoinit # select kernel set (just one in this release, more later) fp = Flexpt(kernel_set="fgemm_float32_wide64", bench=True) # set dims for layer 5 of Alexnet N,C,K = (128,192,384) D,H,W = (1,13,13) T,R,S = (1,3,3) # set padding, stride and upscale padding_z, padding_y, padding_x = (0,0,0) strides_z, strides_y, strides_x = (1,1,1) upscale_z, upscale_y, upscale_x = (1,1,1) # set input integer word length iwl = 15 # input dimensions
# ---------------------------------------------------------------------------- # Copyright 2014 Nervana Systems Inc. All rights reserved. # ---------------------------------------------------------------------------- import numpy as np import pycuda.driver as drv from flexpt_array import Flexpt from pycuda.autoinit import context import struct fp = Flexpt(kernel_set="fgemm_float32_wide64", calc_partials=False) op = "nt" # n == not transpose, t == transpose m = 4096 n = 4096 k = 4096 repeat = 50 iwlA = 15 iwlB = 15 if op == "nt": dim1 = (k, m) dim2 = (k, n) elif op == "nn": dim1 = (m, k) dim2 = (k, n) elif op == "tn": dim1 = (m, k) dim2 = (n, k) A1 = np.random.randint(0x0, 0x7fff, size=dim1).astype(np.int64) B1 = np.random.randint(0x0, 0x7fff, size=dim2).astype(np.int64)