Exemplo n.º 1
0
  (D, H, W) - depth, height and width of input image
  (T, R, S) - depth, height and width of filter kernels
  padding_{x,y,z} - zero padding
  strides_{x,y,z} - filter striding
  upscale_{x,y,z} - upscaling

[*] Chetlur et al. 'cuDNN: Efficient primitives for deep learning.' arXiv:1410.0759
"""
import numpy as np
import struct
import pycuda.driver as drv
from flexpt_array import Flexpt
import pycuda.autoinit 

# select kernel set (just one in this release, more later)
fp = Flexpt(kernel_set="fgemm_float32_wide64", bench=True)

# set dims for layer 5 of Alexnet
N,C,K = (128,192,384)
D,H,W = (1,13,13)
T,R,S = (1,3,3)

# set padding, stride and upscale
padding_z, padding_y, padding_x = (0,0,0)
strides_z, strides_y, strides_x = (1,1,1)
upscale_z, upscale_y, upscale_x = (1,1,1)

# set input integer word length
iwl = 15

# input dimensions
# ----------------------------------------------------------------------------
# Copyright 2014 Nervana Systems Inc.  All rights reserved.
# ----------------------------------------------------------------------------
import numpy as np
import pycuda.driver as drv
from flexpt_array import Flexpt
from pycuda.autoinit import context
import struct

fp = Flexpt(kernel_set="fgemm_float32_wide64", calc_partials=False)

op = "nt"  # n == not transpose, t == transpose
m = 4096
n = 4096
k = 4096
repeat = 50
iwlA = 15
iwlB = 15

if op == "nt":
    dim1 = (k, m)
    dim2 = (k, n)
elif op == "nn":
    dim1 = (m, k)
    dim2 = (k, n)
elif op == "tn":
    dim1 = (m, k)
    dim2 = (n, k)

A1 = np.random.randint(0x0, 0x7fff, size=dim1).astype(np.int64)
B1 = np.random.randint(0x0, 0x7fff, size=dim2).astype(np.int64)