示例#1
0
def shutdown():
    global _handles, _pid

    pid = os.getpid()
    if _pid != pid:  # not initialized
        return

    for handle in _handles.itervalues():
        libcudnn.cudnnDestroy(handle)

    _handles = {}
    _pid = None  # mark as uninitialized
示例#2
0
def shutdown():
    global _handles, _pid

    pid = os.getpid()
    if _pid != pid:  # not initialized
        return

    for handle in _handles.itervalues():
        libcudnn.cudnnDestroy(handle)

    _handles = {}
    _pid = None  # mark as uninitialized
示例#3
0
    libcudnn.cudnnConvolutionForward(cudnn_context, alpha, X_desc, X_data,
        filters_desc, filters_data, conv_desc, algo, ws_data, ws_size.value, beta,
        Y_desc, Y_data)
    start_bench()

    for i in range(10):
        libcudnn.cudnnConvolutionForward(cudnn_context, alpha, X_desc, X_data,
            filters_desc, filters_data, conv_desc, algo, ws_data, ws_size.value, beta,
            Y_desc, Y_data)

    ms = end_bench()

    ws_ptr = None
    libcudnn.cudnnDestroyTensorDescriptor(X_desc)
    libcudnn.cudnnDestroyTensorDescriptor(Y_desc)
    libcudnn.cudnnDestroyFilterDescriptor(filters_desc)
    libcudnn.cudnnDestroyConvolutionDescriptor(conv_desc)

    return ms / 10

# for kw in range(1, 11):
    # for kh in range(1, 11):
        # ms = benchmark_conv(kw, kh)
        # print("%dx%d : %fms" % (kw, kh, ms))
for bsz in range(1, 32):
    ms = benchmark_conv(11, 11, bsz)
    print("%d : %.2fms => %f img/sec" % (bsz, ms, bsz/ms))
# Clean up
libcudnn.cudnnDestroy(cudnn_context)
示例#4
0
    maxU = parU[0:1,0:1]

    maxo  = ng.max(abs(cuO - nlO.T), partial=parO, out=maxO).get()[0,0]
    maxb  = ng.max(abs(cuB - nlB.T), partial=parB, out=maxB).get()[0,0]
    maxu  = ng.max(abs(cuU - nlU.T), partial=parU, out=maxU).get()[0,0]

    meano = ng.mean(abs(cuO), partial=parO, out=maxO).get()[0,0]
    meanb = ng.mean(abs(cuB), partial=parB, out=maxB).get()[0,0]
    meanu = ng.mean(abs(cuU), partial=parU, out=maxU).get()[0,0]

    print "        maxerr   mean   pct"
    print "fprop: %7.5f %6.2f %5.3f" % (maxo, meano, 100*maxo/meano)
    print "bprop: %7.5f %6.2f %5.3f" % (maxb, meanb, 100*maxb/meanb)
    print "updat: %7.5f %6.2f %5.3f" % (maxu, meanu, 100*maxu/meanu)

    # free up memory from this layer before proceeding
    cuB  = cuU  = cuO  = None
    nlB  = nlU  = nlO  = None
    parO = parB = parU = maxO = maxB = maxU = None


libcudnn.cudnnDestroyTensorDescriptor(I_desc)
libcudnn.cudnnDestroyTensorDescriptor(O_desc)
libcudnn.cudnnDestroyFilterDescriptor(F_desc)
libcudnn.cudnnDestroyTensorDescriptor(E_desc)
libcudnn.cudnnDestroyTensorDescriptor(B_desc)
libcudnn.cudnnDestroyFilterDescriptor(U_desc)
libcudnn.cudnnDestroyConvolutionDescriptor(C_desc)

libcudnn.cudnnDestroy(cudnn)
X_data = ctypes.c_void_p(int(X.gpudata))
filters_data = ctypes.c_void_p(int(filters.gpudata))
Y_data = ctypes.c_void_p(int(Y.gpudata))

# Perform convolution
algo = libcudnn.cudnnGetConvolutionForwardAlgorithm(cudnn_context, X_desc,
    filters_desc, conv_desc, Y_desc, convolution_fwd_pref, 0)

print("Cudnn algorithm = %d" % algo.value)

ws_size = libcudnn.cudnnGetConvolutionForwardWorkspaceSize(cudnn_context, X_desc, filters_desc, conv_desc, Y_desc, algo)
ws_ptr  = drv.mem_alloc(ws_size.value) if ws_size.value > 0 else 0
ws_data = ctypes.c_void_p(int(ws_ptr))

start_bench()

libcudnn.cudnnConvolutionForward(cudnn_context, alpha, X_desc, X_data,
    filters_desc, filters_data, conv_desc, algo, ws_data, ws_size.value, beta,
    Y_desc, Y_data)

end_bench("fprop")

ws_ptr = None

# Clean up
libcudnn.cudnnDestroyTensorDescriptor(X_desc)
libcudnn.cudnnDestroyTensorDescriptor(Y_desc)
libcudnn.cudnnDestroyFilterDescriptor(filters_desc)
libcudnn.cudnnDestroyConvolutionDescriptor(conv_desc)
libcudnn.cudnnDestroy(cudnn_context)
示例#6
0
    maxB = parB[0:1, 0:1]
    maxU = parU[0:1, 0:1]

    maxo = ng.max(abs(cuO - nlO.T), partial=parO, out=maxO).get()[0, 0]
    maxb = ng.max(abs(cuB - nlB.T), partial=parB, out=maxB).get()[0, 0]
    maxu = ng.max(abs(cuU - nlU.T), partial=parU, out=maxU).get()[0, 0]

    meano = ng.mean(abs(cuO), partial=parO, out=maxO).get()[0, 0]
    meanb = ng.mean(abs(cuB), partial=parB, out=maxB).get()[0, 0]
    meanu = ng.mean(abs(cuU), partial=parU, out=maxU).get()[0, 0]

    print "        maxerr   mean   pct"
    print "fprop: %7.5f %6.2f %5.3f" % (maxo, meano, 100 * maxo / meano)
    print "bprop: %7.5f %6.2f %5.3f" % (maxb, meanb, 100 * maxb / meanb)
    print "updat: %7.5f %6.2f %5.3f" % (maxu, meanu, 100 * maxu / meanu)

    # free up memory from this layer before proceeding
    cuB = cuU = cuO = None
    nlB = nlU = nlO = None
    parO = parB = parU = maxO = maxB = maxU = None

libcudnn.cudnnDestroyTensorDescriptor(I_desc)
libcudnn.cudnnDestroyTensorDescriptor(O_desc)
libcudnn.cudnnDestroyFilterDescriptor(F_desc)
libcudnn.cudnnDestroyTensorDescriptor(E_desc)
libcudnn.cudnnDestroyTensorDescriptor(B_desc)
libcudnn.cudnnDestroyFilterDescriptor(U_desc)
libcudnn.cudnnDestroyConvolutionDescriptor(C_desc)

libcudnn.cudnnDestroy(cudnn)