def run_gradweight(self, inputs_shape, filters_shape, dCdH_shape, subsample=(1, 1, 1)): inputs_val = numpy.random.random(inputs_shape).astype('float32') dCdH_val = numpy.random.random(dCdH_shape).astype('float32') inputs = shared(inputs_val) dCdH = shared(dCdH_val) conv = theano.tensor.nnet.convGrad3D(V=inputs, dCdH=dCdH, WShape=filters_shape, d=subsample) img = gpu_contiguous(inputs.dimshuffle(0, 4, 1, 2, 3)) topgrad = gpu_contiguous(dCdH.dimshuffle(0, 4, 1, 2, 3)) if (subsample == (1, 1, 1)): conv_gemm = GpuCorr3dMM_gradWeights(subsample=subsample)(img, topgrad) else: conv_gemm = GpuCorr3dMM_gradWeights(subsample=subsample)(img, topgrad, shape=filters_shape[1:4]) conv_gemm = conv_gemm.dimshuffle(0, 2, 3, 4, 1) f_ref = theano.function([], conv) f = theano.function([], conv_gemm) res_ref = f_ref() res = f() utt.assert_allclose(res_ref, res)
def run_gradinput(self, inputs_shape, filters_shape, subsample=(1, 1, 1)): inputs_val = numpy.random.random(inputs_shape).astype('float32') filters_val = numpy.random.random(filters_shape).astype('float32') inputs = shared(inputs_val) filters = shared(filters_val) bottom_height = (inputs_shape[1] - 1) * subsample[0] + filters_shape[1] bottom_width = (inputs_shape[2] - 1) * subsample[1] + filters_shape[2] bottom_depth = (inputs_shape[3] - 1) * subsample[2] + filters_shape[3] bottom_shape = theano.shared(numpy.array([bottom_height, bottom_width, bottom_depth])) weight = gpu_contiguous(filters.dimshuffle(0, 4, 1, 2, 3)) top = gpu_contiguous(inputs.dimshuffle(0, 4, 1, 2, 3)) if (subsample == (1, 1, 1)): conv_ref = Corr3dMM_gradInputs(subsample=subsample)( kern=weight, topgrad=top) conv_gemm = GpuCorr3dMM_gradInputs(subsample=subsample)( kern=weight, topgrad=top) else: conv_ref = Corr3dMM_gradInputs(subsample=subsample)( kern=weight, topgrad=top, shape=bottom_shape) conv_gemm = GpuCorr3dMM_gradInputs(subsample=subsample)( kern=weight, topgrad=top, shape=bottom_shape) f_ref = theano.function([], conv_ref, mode='FAST_RUN') f = theano.function([], conv_gemm, mode=mode_with_gpu) res_ref = f_ref() res = f() utt.assert_allclose(res_ref, res)
def test_opt_convtransp3d_gemm(self): inputs_shape = (16, 15, 12, 12, 10) filters_shape = (10, 6, 12, 4, 1) inputs_val = numpy.random.random(inputs_shape).astype('float32') filters_val = numpy.random.random(filters_shape).astype('float32') bias = shared(numpy.zeros(filters_shape[4]).astype('float32')) inputs = shared(inputs_val) filters = shared(filters_val) conv = theano.tensor.nnet.convTransp3D(W=filters, b=bias, d=(1, 1, 1), H=inputs) mode = mode_with_gpu.including('convtransp3d_gemm') f_ref = theano.function([], conv) f_gemm = theano.function([], conv, mode=mode) # make sure we inserted the gemm trickery topo = f_gemm.maker.fgraph.toposort() assert sum(isinstance(n.op, GpuCorr3dMM_gradInputs) for n in topo) > 0 res_ref = f_ref() res_gemm = f_gemm() utt.assert_allclose(res_ref, res_gemm)
def test_opt_conv3d_fft(self): inputs_shape = (16, 20, 32, 16, 1) filters_shape = (10, 6, 12, 4, 1) inputs_val = numpy.random.random(inputs_shape).astype('float32') filters_val = numpy.random.random(filters_shape).astype('float32') inputs = shared(inputs_val) filters = shared(filters_val) bias = shared(numpy.zeros(filters_shape[0]).astype('float32')) conv = theano.tensor.nnet.conv3D(V=inputs, W=filters, b=bias, d=(1, 1, 1)) ref_mode = copy.copy(theano.compile.get_default_mode()) ref_mode.check_py_code = False mode = mode_with_gpu.including('conv3d_fft') mode.check_py_code = False f_ref = theano.function([], conv, mode=ref_mode) f_fft = theano.function([], conv, mode=mode) # make sure we inserted the fft trickery topo = f_fft.maker.fgraph.toposort() assert sum( isinstance(n.op, theano.sandbox.cuda.fftconv.CuFFTOp) for n in topo) == 2 res_ref = f_ref() res_fft = f_fft() utt.assert_allclose(res_ref, res_fft)
def test_opt_nofft_full(self): inputs_shape = (5, 3, 7, 6) filters_shape = (2, 3, 3, 3) inputs_val = numpy.random.random(inputs_shape).astype('float32') filters_val = numpy.random.random(filters_shape).astype('float32') inputs = shared(inputs_val) filters = shared(filters_val) conv = theano.tensor.nnet.conv.conv2d(inputs, filters, border_mode='full', version='no_fft') mode = mode_with_gpu.including('conv_fft_full') f_ref = theano.function([], conv) f_fft = theano.function([], conv, mode=mode) # make sure we that no CuFFTOp has been inserted topo = f_fft.maker.fgraph.toposort() assert sum( isinstance(n.op, theano.sandbox.cuda.fftconv.CuFFTOp) for n in topo) == 0
def run_conv_full(self, inputs_shape, filters_shape, pad=False): inputs_val = numpy.random.random(inputs_shape).astype('float32') filters_val = numpy.random.random(filters_shape).astype('float32') inputs = shared(inputs_val) filters = shared(filters_val) bias = shared(numpy.zeros(filters_shape[4]).astype('float32')) conv_ref = theano.tensor.nnet.convTransp3D(W=filters, b=bias, d=(1, 1, 1), H=inputs) filters = filters.dimshuffle(4, 0, 1, 2, 3) inputs = inputs.dimshuffle(0, 4, 1, 2, 3) conv_fft = theano.sandbox.cuda.fftconv.conv3d_fft(inputs, filters, border_mode="full", pad_last_dim=pad) conv_fft = conv_fft.dimshuffle(0, 2, 3, 4, 1) f_ref = theano.function([], conv_ref) f_fft = theano.function([], conv_fft, mode=mode_with_gpu) res_ref = f_ref() res_fft = f_fft() utt.assert_allclose(res_ref, res_fft, rtol=1e-04, atol=1e-04)
def test_opt_convgrad3d_gemm(self): inputs_shape = (16, 10, 12, 16, 1) filters_shape = (10, 6, 12, 4, 1) dCdH_shape = (16, 5, 1, 13, 10) inputs_val = numpy.random.random(inputs_shape).astype('float32') dCdH_val = numpy.random.random(dCdH_shape).astype('float32') inputs = shared(inputs_val) dCdH = shared(dCdH_val) conv = theano.tensor.nnet.convGrad3D(V=inputs, dCdH=dCdH, WShape=filters_shape, d=(1, 1, 1)) mode = mode_with_gpu.including('convgrad3d_gemm') f_ref = theano.function([], conv) f_gemm = theano.function([], conv, mode=mode) # make sure we inserted the gemm trickery topo = f_gemm.maker.fgraph.toposort() assert sum(isinstance(n.op, GpuCorr3dMM_gradWeights) for n in topo) > 0 res_ref = f_ref() res_gemm = f_gemm() utt.assert_allclose(res_ref, res_gemm)
def test_opt_full(self): inputs_shape = (5, 3, 7, 6) filters_shape = (2, 3, 3, 3) inputs_val = numpy.random.random(inputs_shape).astype('float32') filters_val = numpy.random.random(filters_shape).astype('float32') inputs = shared(inputs_val) filters = shared(filters_val) conv = theano.tensor.nnet.conv.conv2d(inputs, filters, border_mode='full') mode = mode_with_gpu.including('conv_fft_full') f_ref = theano.function([], conv) f_fft = theano.function([], conv, mode=mode) # make sure we inserted the fft trickery topo = f_fft.maker.fgraph.toposort() assert sum(isinstance(n.op, theano.sandbox.cuda.fftconv.CuFFTOp) for n in topo) == 2, topo res_ref = f_ref() res_fft = f_fft() utt.assert_allclose(res_ref, res_fft)
def test_opt_conv3d_gemm(self): inputs_shape = (16, 20, 32, 16, 1) filters_shape = (10, 6, 12, 4, 1) inputs_val = numpy.random.random(inputs_shape).astype('float32') filters_val = numpy.random.random(filters_shape).astype('float32') inputs = shared(inputs_val) filters = shared(filters_val) bias = shared(numpy.zeros(filters_shape[0]).astype('float32')) conv = theano.tensor.nnet.conv3D(V=inputs, W=filters, b=bias, d=(1, 1, 1)) mode = mode_with_gpu.including('conv3d_gemm') mode.check_py_code = False f_ref = theano.function([], conv, mode="FAST_RUN") f_gemm = theano.function([], conv, mode=mode) # make sure we inserted the gemm trickery topo = f_gemm.maker.fgraph.toposort() assert sum(isinstance(n.op, GpuCorr3dMM) for n in topo) > 0 res_ref = f_ref() res_gemm = f_gemm() utt.assert_allclose(res_ref, res_gemm)
def test_opt_convgrad3d_fft(self): inputs_shape = (2, 17, 15, 16, 1) filters_shape = (10, 6, 7, 4, 1) dCdH_shape = (inputs_shape[0], inputs_shape[1] - filters_shape[1] + 1, inputs_shape[2] - filters_shape[2] + 1, inputs_shape[3] - filters_shape[3] + 1, filters_shape[0]) inputs_val = numpy.random.random(inputs_shape).astype('float32') dCdH_val = numpy.random.random(dCdH_shape).astype('float32') inputs = shared(inputs_val) dCdH = shared(dCdH_val) conv = theano.tensor.nnet.convGrad3D(V=inputs, dCdH=dCdH, WShape=filters_shape, d=(1, 1, 1)) mode = mode_with_gpu.including('convgrad3d_fft') mode.check_py_code = False f_ref = theano.function([], conv, mode="FAST_RUN") f_fft = theano.function([], conv, mode=mode) # make sure we inserted the fft trickery topo = f_fft.maker.fgraph.toposort() assert sum(isinstance(n.op, theano.sandbox.cuda.fftconv.CuFFTOp) for n in topo) == 2 res_ref = f_ref() res_fft = f_fft() utt.assert_allclose(res_ref, res_fft, rtol=1e-04, atol=1e-04)
def test_opt_convtransp3d_fft(self): inputs_shape = (2, 9, 16, 12, 10) filters_shape = (10, 3, 8, 4, 1) inputs_val = numpy.random.random(inputs_shape).astype('float32') filters_val = numpy.random.random(filters_shape).astype('float32') bias = shared(numpy.zeros(filters_shape[4]).astype('float32')) inputs = shared(inputs_val) filters = shared(filters_val) conv = theano.tensor.nnet.convTransp3D(W=filters, b=bias, d=(1, 1, 1), H=inputs) mode = mode_with_gpu.including('convtransp3d_fft') f_ref = theano.function([], conv) f_fft = theano.function([], conv, mode=mode) # make sure we inserted the fft trickery topo = f_fft.maker.fgraph.toposort() assert sum(isinstance(n.op, theano.sandbox.cuda.fftconv.CuFFTOp) for n in topo) == 2 res_ref = f_ref() res_fft = f_fft() utt.assert_allclose(res_ref, res_fft, rtol=1e-04, atol=1e-04)
def run_conv_valid(self, inputs_shape, filters_shape, pad=False): inputs_val = numpy.random.random(inputs_shape).astype('float32') filters_val = numpy.random.random(filters_shape).astype('float32') inputs = shared(inputs_val) filters = shared(filters_val) bias = shared(numpy.zeros(filters_shape[0]).astype('float32')) # Flip filter as conv3D compute correlation filters_flip = filters[:, ::-1, ::-1, ::-1, :] # filters_flip = filters conv_ref = theano.tensor.nnet.conv3D(V=inputs, W=filters_flip, b=bias, d=(1, 1, 1)) conv_fft = theano.sandbox.cuda.fftconv.conv3d_fft( inputs.dimshuffle(0, 4, 1, 2, 3), filters.dimshuffle(0, 4, 1, 2, 3), border_mode="valid", pad_last_dim=pad) conv_fft = conv_fft.dimshuffle(0, 2, 3, 4, 1) f_ref = theano.function([], conv_ref, mode="FAST_RUN") mode = mode_with_gpu mode.check_py_code = False f_fft = theano.function([], conv_fft, mode=mode) res_ref = f_ref() res_fft = f_fft() utt.assert_allclose(res_ref, res_fft, rtol=1e-05, atol=1e-05)
def run_conv_full(self, inputs_shape, filters_shape, pad=False): inputs_val = numpy.random.random(inputs_shape).astype('float32') filters_val = numpy.random.random(filters_shape).astype('float32') inputs = shared(inputs_val) filters = shared(filters_val) bias = shared(numpy.zeros(filters_shape[4]).astype('float32')) conv_ref = theano.tensor.nnet.convTransp3D( W=filters, b=bias, d=(1, 1, 1), H=inputs) filters = filters.dimshuffle(4, 0, 1, 2, 3) inputs = inputs.dimshuffle(0, 4, 1, 2, 3) conv_fft = theano.sandbox.cuda.fftconv.conv3d_fft(inputs, filters, border_mode="full", pad_last_dim=pad) conv_fft = conv_fft.dimshuffle(0, 2, 3, 4, 1) f_ref = theano.function([], conv_ref) f_fft = theano.function([], conv_fft, mode=mode_with_gpu) res_ref = f_ref() res_fft = f_fft() utt.assert_allclose(res_ref, res_fft, rtol=1e-04, atol=1e-04)
def run_gradinput(self, inputs_shape, filters_shape, subsample=(1, 1, 1)): inputs_val = numpy.random.random(inputs_shape).astype('float32') filters_val = numpy.random.random(filters_shape).astype('float32') inputs = shared(inputs_val) filters = shared(filters_val) bias = shared(numpy.zeros(filters_shape[4]).astype('float32')) conv = theano.tensor.nnet.convTransp3D(W=filters, b=bias, d=subsample, H=inputs) f_ref = theano.function([], conv) res_ref = f_ref() # Get bottom shape using convTransp3D bottom_shape = res_ref.shape bottom_val = numpy.random.random(bottom_shape).astype('float32') bottom = shared(bottom_val) weight = gpu_contiguous(filters.dimshuffle(0, 4, 1, 2, 3)) top = gpu_contiguous(inputs.dimshuffle(0, 4, 1, 2, 3)) if (subsample == (1, 1, 1)): conv_gemm = GpuCorr3dMM_gradInputs(subsample=subsample)( kern=weight, topgrad=top) else: conv_gemm = GpuCorr3dMM_gradInputs(subsample=subsample)( kern=weight, topgrad=top, shape=bottom.shape[1:4]) conv_gemm = conv_gemm.dimshuffle(0, 2, 3, 4, 1) f = theano.function([], conv_gemm, mode=mode_with_gpu) res = f() utt.assert_allclose(res_ref, res)
def run_gradweight(self, inputs_shape, filters_shape, dCdH_shape, subsample=(1, 1, 1)): inputs_val = numpy.random.random(inputs_shape).astype('float32') dCdH_val = numpy.random.random(dCdH_shape).astype('float32') inputs = shared(inputs_val) dCdH = shared(dCdH_val) img = gpu_contiguous(inputs.dimshuffle(0, 4, 1, 2, 3)) topgrad = gpu_contiguous(dCdH.dimshuffle(0, 4, 1, 2, 3)) if (subsample == (1, 1, 1)): conv_ref = Corr3dMM_gradWeights(subsample=subsample)(img, topgrad) conv_gemm = GpuCorr3dMM_gradWeights(subsample=subsample)(img, topgrad) else: conv_ref = GpuCorr3dMM_gradWeights(subsample=subsample)( img, topgrad, shape=filters_shape[1:4]) conv_gemm = GpuCorr3dMM_gradWeights(subsample=subsample)( img, topgrad, shape=filters_shape[1:4]) f_ref = theano.function([], conv_ref) f = theano.function([], conv_gemm, mode=mode_with_gpu) res_ref = f_ref() res = f() utt.assert_allclose(res_ref, res)
def test_opt_convtransp3d_fft(self): inputs_shape = (16, 15, 21, 12, 10) filters_shape = (10, 6, 12, 4, 1) inputs_val = numpy.random.random(inputs_shape).astype('float32') filters_val = numpy.random.random(filters_shape).astype('float32') bias = shared(numpy.zeros(filters_shape[4]).astype('float32')) inputs = shared(inputs_val) filters = shared(filters_val) conv = theano.tensor.nnet.convTransp3D(W=filters, b=bias, d=(1,1,1), H=inputs) mode = mode_with_gpu.including('convtransp3d_fft') f_ref = theano.function([], conv) f_fft = theano.function([], conv, mode=mode) # make sure we inserted the fft trickery topo = f_fft.maker.fgraph.toposort() assert sum(isinstance(n.op, theano.sandbox.cuda.fftconv.CuFFTOp) for n in topo) == 2 res_ref = f_ref() res_fft = f_fft() utt.assert_allclose(res_ref, res_fft, rtol=1e-04, atol=1e-04)
def run_gradinput(self, inputs_shape, filters_shape, subsample=(1, 1, 1)): inputs_val = numpy.random.random(inputs_shape).astype('float32') filters_val = numpy.random.random(filters_shape).astype('float32') inputs = shared(inputs_val) filters = shared(filters_val) bottom_height = (inputs_shape[1] - 1) * subsample[0] + filters_shape[1] bottom_width = (inputs_shape[2] - 1) * subsample[1] + filters_shape[2] bottom_depth = (inputs_shape[3] - 1) * subsample[2] + filters_shape[3] bottom_shape = theano.shared( numpy.array([bottom_height, bottom_width, bottom_depth])) weight = gpu_contiguous(filters.dimshuffle(0, 4, 1, 2, 3)) top = gpu_contiguous(inputs.dimshuffle(0, 4, 1, 2, 3)) if (subsample == (1, 1, 1)): conv_ref = Corr3dMM_gradInputs(subsample=subsample)(kern=weight, topgrad=top) conv_gemm = GpuCorr3dMM_gradInputs(subsample=subsample)( kern=weight, topgrad=top) else: conv_ref = Corr3dMM_gradInputs(subsample=subsample)( kern=weight, topgrad=top, shape=bottom_shape) conv_gemm = GpuCorr3dMM_gradInputs(subsample=subsample)( kern=weight, topgrad=top, shape=bottom_shape) f_ref = theano.function([], conv_ref) f = theano.function([], conv_gemm, mode=mode_with_gpu) res_ref = f_ref() res = f() utt.assert_allclose(res_ref, res)
def test_opt_convgrad3d_fft(self): inputs_shape = (16, 20, 32, 16, 1) filters_shape = (10, 6, 12, 4, 1) dCdH_shape = (16, 15, 21, 13, 10) inputs_val = numpy.random.random(inputs_shape).astype('float32') dCdH_val = numpy.random.random(dCdH_shape).astype('float32') inputs = shared(inputs_val) dCdH = shared(dCdH_val) conv = theano.tensor.nnet.convGrad3D(V=inputs, dCdH=dCdH, WShape=filters_shape, d=(1,1,1)) mode = mode_with_gpu.including('convgrad3d_fft') mode.check_py_code = False f_ref = theano.function([], conv, mode="FAST_RUN") f_fft = theano.function([], conv, mode=mode) # make sure we inserted the fft trickery topo = f_fft.maker.fgraph.toposort() assert sum(isinstance(n.op, theano.sandbox.cuda.fftconv.CuFFTOp) for n in topo) == 2 res_ref = f_ref() res_fft = f_fft() utt.assert_allclose(res_ref, res_fft, rtol=1e-04, atol=1e-04)
def run_conv_valid(self, inputs_shape, filters_shape, pad=False): inputs_val = numpy.random.random(inputs_shape).astype('float32') filters_val = numpy.random.random(filters_shape).astype('float32') inputs = shared(inputs_val) filters = shared(filters_val) bias = shared(numpy.zeros(filters_shape[0]).astype('float32')) # Flip filter as conv3D compute correlation filters_flip = filters[:,::-1,::-1,::-1,:] #filters_flip = filters conv_ref = theano.tensor.nnet.conv3D(V=inputs, W=filters_flip, b=bias, d=(1,1,1)) conv_fft = theano.sandbox.cuda.fftconv.conv3d_fft(inputs.dimshuffle(0, 4, 1, 2, 3), filters.dimshuffle(0, 4, 1, 2, 3), border_mode = "valid", pad_last_dim = pad) conv_fft = conv_fft.dimshuffle(0, 2, 3, 4, 1) f_ref = theano.function([], conv_ref, mode="FAST_RUN") mode = mode_with_gpu mode.check_py_code = False f_fft = theano.function([], conv_fft, mode=mode) res_ref = f_ref() res_fft = f_fft() utt.assert_allclose(res_ref, res_fft, rtol=1e-05, atol=1e-05)
def run_gradweight(self, inputs_shape, filters_shape, dCdH_shape, subsample=(1, 1, 1)): inputs_val = numpy.random.random(inputs_shape).astype('float32') dCdH_val = numpy.random.random(dCdH_shape).astype('float32') inputs = shared(inputs_val) dCdH = shared(dCdH_val) conv = theano.tensor.nnet.convGrad3D(V=inputs, dCdH=dCdH, WShape=filters_shape, d=subsample) img = gpu_contiguous(inputs.dimshuffle(0, 4, 1, 2, 3)) topgrad = gpu_contiguous(dCdH.dimshuffle(0, 4, 1, 2, 3)) if (subsample == (1, 1, 1)): conv_gemm = GpuCorr3dMM_gradWeights(subsample=subsample)(img, topgrad) else: conv_gemm = GpuCorr3dMM_gradWeights(subsample=subsample)( img, topgrad, shape=filters_shape[1:4]) conv_gemm = conv_gemm.dimshuffle(0, 2, 3, 4, 1) f_ref = theano.function([], conv) f = theano.function([], conv_gemm, mode=mode_with_gpu) res_ref = f_ref() res = f() utt.assert_allclose(res_ref, res)
def run_conv3d_fwd(inputs_shape, filters_shape, subsample, border_mode, conv_mode): inputs_val = numpy.random.random(inputs_shape).astype('float32') filters_val = numpy.random.random(filters_shape).astype('float32') # Scale down the input values to prevent very large absolute errors # due to float rounding inputs_val /= 10 filters_val /= 10 inputs = shared(inputs_val) filters = shared(filters_val) bias = shared(numpy.zeros(filters_shape[0]).astype('float32')) # Compile a theano function for the CuDNN implementation conv = dnn.dnn_conv3d(img=inputs, kerns=filters, border_mode=border_mode, subsample=subsample, conv_mode=conv_mode) f = theano.function([], conv, mode=mode_with_gpu) # If conv_mode is 'conv' the reference implementation should use # filters filpped according to the width, height and time axis if conv_mode == 'conv': flipped_filters = filters[:, :, ::-1, ::-1, ::-1] else: flipped_filters = filters # If border mode is anything but 'valid', the reference implementation # should operate on padded inputs if border_mode == 'valid': padded_inputs = inputs else: if border_mode == 'full': pad_per_dim = [filters_shape[i] - 1 for i in range(2, 5)] else: if isinstance(border_mode, int): pad_per_dim = [border_mode] * 3 else: pad_per_dim = border_mode pad_before_after = ([(0, 0), (0, 0)] + [(p, p) for p in pad_per_dim]) padded_inputs_val = numpy.pad(inputs_val, pad_before_after, 'constant') padded_inputs = shared(padded_inputs_val) # Compile a theano function for the reference implementation conv_ref = theano.tensor.nnet.conv3D( V=padded_inputs.dimshuffle(0, 2, 3, 4, 1), W=flipped_filters.dimshuffle(0, 2, 3, 4, 1), b=bias, d=subsample) f_ref = theano.function([], conv_ref.dimshuffle(0, 4, 1, 2, 3)) # Compare the results of the two implementations res_ref = f_ref() res = f() utt.assert_allclose(res_ref, res)
def run_conv(self, inputs_shape, filters_shape, pad=False, **other_args): inputs_val = numpy.random.random(inputs_shape).astype("float32") filters_val = numpy.random.random(filters_shape).astype("float32") inputs = shared(inputs_val) filters = shared(filters_val) conv_ref = theano.tensor.nnet.conv.conv2d(inputs, filters, **other_args) conv_fft = theano.sandbox.cuda.fftconv.conv2d_fft(inputs, filters, pad_last_dim=pad, **other_args) f_ref = theano.function([], conv_ref) f_fft = theano.function([], conv_fft, mode=mode_with_gpu) res_ref = f_ref() res_fft = f_fft() utt.assert_allclose(res_ref, res_fft)
def run_conv_valid(self, inputs_shape, filters_shape, border_mode='valid', filter_dilation=(1, 1, 1), subsample=(1, 1, 1), verify_grad=False): inputs_val = numpy.random.random(inputs_shape).astype('float32') filters_val = numpy.random.random(filters_shape).astype('float32') inputs = shared(inputs_val) filters = shared(filters_val) conv_ref = Corr3dMM(border_mode=border_mode, filter_dilation=filter_dilation, subsample=subsample)(inputs.dimshuffle( 0, 4, 1, 2, 3), filters.dimshuffle(0, 4, 1, 2, 3)) conv_ref = conv_ref.dimshuffle(0, 2, 3, 4, 1) f_ref = theano.function([], conv_ref, mode='FAST_RUN') conv = GpuCorr3dMM(border_mode=border_mode, filter_dilation=filter_dilation, subsample=subsample)(inputs.dimshuffle( 0, 4, 1, 2, 3), filters.dimshuffle(0, 4, 1, 2, 3)) conv = conv.dimshuffle(0, 2, 3, 4, 1) f = theano.function([], conv, mode=mode_with_gpu) res_ref = f_ref() res = f() utt.assert_allclose(res_ref, res) if verify_grad: utt.verify_grad(GpuCorr3dMM(border_mode=border_mode, filter_dilation=filter_dilation, subsample=subsample), [ inputs_val.transpose(0, 4, 1, 2, 3), filters_val.transpose(0, 4, 1, 2, 3) ], mode=mode_with_gpu)
def run_conv(self, inputs_shape, filters_shape, pad=False, **other_args): inputs_val = numpy.random.random(inputs_shape).astype('float32') filters_val = numpy.random.random(filters_shape).astype('float32') inputs = shared(inputs_val) filters = shared(filters_val) conv_ref = theano.tensor.nnet.conv.conv2d(inputs, filters, **other_args) conv_fft = theano.sandbox.cuda.fftconv.conv2d_fft(inputs, filters, pad_last_dim=pad, **other_args) f_ref = theano.function([], conv_ref) f_fft = theano.function([], conv_fft, mode=mode_with_gpu) res_ref = f_ref() res_fft = f_fft() utt.assert_allclose(res_ref, res_fft)
def test_opt_nofft_valid(self): inputs_shape = (5, 3, 7, 6) filters_shape = (2, 3, 3, 3) inputs_val = numpy.random.random(inputs_shape).astype('float32') filters_val = numpy.random.random(filters_shape).astype('float32') inputs = shared(inputs_val) filters = shared(filters_val) conv = theano.tensor.nnet.conv.conv2d(inputs, filters, version='no_fft') mode = mode_with_gpu.including('conv_fft_valid') f_fft = theano.function([], conv, mode=mode) # make sure we that no CuFFTOp has been inserted topo = f_fft.maker.fgraph.toposort() assert sum(isinstance(n.op, theano.sandbox.cuda.fftconv.CuFFTOp) for n in topo) == 0
def run_conv_valid(self, inputs_shape, filters_shape, subsample=(1, 1, 1)): inputs_val = numpy.random.random(inputs_shape).astype('float32') filters_val = numpy.random.random(filters_shape).astype('float32') inputs = shared(inputs_val) filters = shared(filters_val) bias = shared(numpy.zeros(filters_shape[0]).astype('float32')) conv_ref = theano.tensor.nnet.conv3D(V=inputs, W=filters, b=bias, d=subsample) conv = GpuCorr3dMM(border_mode = "valid", subsample=subsample)(inputs.dimshuffle(0, 4, 1, 2, 3), filters.dimshuffle(0, 4, 1, 2, 3)) conv = conv.dimshuffle(0, 2, 3, 4, 1) f_ref = theano.function([], conv_ref) f = theano.function([], conv, mode=mode_with_gpu) res_ref = f_ref() res = f() utt.assert_allclose(res_ref, res)
def run_conv_valid(self, inputs_shape, filters_shape, border_mode='valid', filter_dilation=(1, 1, 1), subsample=(1, 1, 1), verify_grad=False): inputs_val = numpy.random.random(inputs_shape).astype('float32') filters_val = numpy.random.random(filters_shape).astype('float32') inputs = shared(inputs_val) filters = shared(filters_val) conv_ref = Corr3dMM(border_mode=border_mode, filter_dilation=filter_dilation, subsample=subsample)( inputs.dimshuffle(0, 4, 1, 2, 3), filters.dimshuffle(0, 4, 1, 2, 3)) conv_ref = conv_ref.dimshuffle(0, 2, 3, 4, 1) f_ref = theano.function([], conv_ref, mode='FAST_RUN') conv = GpuCorr3dMM(border_mode=border_mode, filter_dilation=filter_dilation, subsample=subsample)( inputs.dimshuffle(0, 4, 1, 2, 3), filters.dimshuffle(0, 4, 1, 2, 3)) conv = conv.dimshuffle(0, 2, 3, 4, 1) f = theano.function([], conv, mode=mode_with_gpu) res_ref = f_ref() res = f() utt.assert_allclose(res_ref, res) if verify_grad: utt.verify_grad(GpuCorr3dMM(border_mode=border_mode, filter_dilation=filter_dilation, subsample=subsample), [inputs_val.transpose(0, 4, 1, 2, 3), filters_val.transpose(0, 4, 1, 2, 3)], mode=mode_with_gpu)
def run_conv3d_bwd(inputs_shape, filters_shape, subsample, border_mode, conv_mode): inputs_val = numpy.random.random(inputs_shape).astype('float32') filters_val = numpy.random.random(filters_shape).astype('float32') inputs = shared(inputs_val) filters = shared(filters_val) bias = shared(numpy.zeros(filters_shape[0]).astype('float32')) # Compile a theano function for the CuDNN implementation conv = dnn.dnn_conv3d(img=inputs, kerns=filters, border_mode=border_mode, subsample=subsample, conv_mode=conv_mode) grad_i, grad_w = theano.tensor.grad(conv.sum(), [inputs, filters]) f = theano.function([], [grad_i, grad_w], mode=mode_with_gpu) # If conv_mode is 'conv' the reference implementation should use # filters filpped according to the width, height and time axis if conv_mode == 'conv': flipped_filters = filters[:, :, ::-1, ::-1, ::-1] else: flipped_filters = filters # If border mode is anything but 'valid', the reference implementation # should operate on padded inputs if border_mode == 'valid': padded_inputs = inputs else: if border_mode == 'full': pad_per_dim = [filters_shape[i] - 1 for i in range(2, 5)] else: if isinstance(border_mode, int): pad_per_dim = [border_mode] * 3 else: pad_per_dim = border_mode pad_before_after = ([(0, 0), (0, 0)] + [(p, p) for p in pad_per_dim]) padded_inputs_val = numpy.pad(inputs_val, pad_before_after, 'constant') padded_inputs = shared(padded_inputs_val) # Compile a theano function for the reference implementation conv_ref = theano.tensor.nnet.conv3D( V=padded_inputs.dimshuffle(0, 2, 3, 4, 1), W=flipped_filters.dimshuffle(0, 2, 3, 4, 1), b=bias, d=subsample) (grad_padded_i_ref, grad_w_ref) = theano.tensor.grad(conv_ref.sum(), [padded_inputs, filters]) # Recover grad_i_ref from grad_padded_i_ref if border_mode == 'valid': grad_i_ref = grad_padded_i_ref else: shp = grad_padded_i_ref.shape grad_i_ref = grad_padded_i_ref[:, :, pad_per_dim[0]:shp[2] - pad_per_dim[0], pad_per_dim[1]:shp[3] - pad_per_dim[1], pad_per_dim[2]:shp[4] - pad_per_dim[2]] f_ref = theano.function([], [grad_i_ref, grad_w_ref]) # Compare the results of the two implementations res_ref = f_ref() res = f() utt.assert_allclose(res_ref[0], res[0]) utt.assert_allclose(res_ref[1], res[1])
def run_conv3d_bwd(inputs_shape, filters_shape, subsample, border_mode, conv_mode): inputs_val = numpy.random.random(inputs_shape).astype('float32') filters_val = numpy.random.random(filters_shape).astype('float32') inputs = shared(inputs_val) filters = shared(filters_val) bias = shared(numpy.zeros(filters_shape[0]).astype('float32')) # Compile a theano function for the CuDNN implementation conv = dnn.dnn_conv3d(img=inputs, kerns=filters, border_mode=border_mode, subsample=subsample, conv_mode=conv_mode) grad_i, grad_w = theano.tensor.grad(conv.sum(), [inputs, filters]) f = theano.function([], [grad_i, grad_w], mode=mode_with_gpu) # If conv_mode is 'conv' the reference implementation should use # filters filpped according to the width, height and time axis if conv_mode == 'conv': flipped_filters = filters[:, :, ::-1, ::-1, ::-1] else: flipped_filters = filters # If border mode is anything but 'valid', the reference implementation # should operate on padded inputs if border_mode == 'valid': padded_inputs = inputs else: if border_mode == 'full': pad_per_dim = [filters_shape[i] - 1 for i in range(2, 5)] else: if isinstance(border_mode, int): pad_per_dim = [border_mode] * 3 else: pad_per_dim = border_mode pad_before_after = ([(0, 0), (0, 0)] + [(p, p) for p in pad_per_dim]) padded_inputs_val = numpy.pad(inputs_val, pad_before_after, 'constant') padded_inputs = shared(padded_inputs_val) # Compile a theano function for the reference implementation conv_ref = theano.tensor.nnet.conv3D( V=padded_inputs.dimshuffle(0, 2, 3, 4, 1), W=flipped_filters.dimshuffle(0, 2, 3, 4, 1), b=bias, d=subsample) (grad_padded_i_ref, grad_w_ref) = theano.tensor.grad(conv_ref.sum(), [padded_inputs, filters]) # Recover grad_i_ref from grad_padded_i_ref if border_mode == 'valid': grad_i_ref = grad_padded_i_ref else: shp = grad_padded_i_ref.shape grad_i_ref = grad_padded_i_ref[ :, :, pad_per_dim[0]:shp[2] - pad_per_dim[0], pad_per_dim[1]:shp[3] - pad_per_dim[1], pad_per_dim[2]:shp[4] - pad_per_dim[2]] f_ref = theano.function([], [grad_i_ref, grad_w_ref]) # Compare the results of the two implementations res_ref = f_ref() res = f() utt.assert_allclose(res_ref[0], res[0]) utt.assert_allclose(res_ref[1], res[1])