示例#1
0
    def run_gradweight(self, inputs_shape, filters_shape, dCdH_shape,
                        subsample=(1, 1, 1)):
        inputs_val = numpy.random.random(inputs_shape).astype('float32')
        dCdH_val = numpy.random.random(dCdH_shape).astype('float32')
        inputs = shared(inputs_val)
        dCdH = shared(dCdH_val)

        conv = theano.tensor.nnet.convGrad3D(V=inputs, dCdH=dCdH,
                                              WShape=filters_shape,
                                              d=subsample)
        img = gpu_contiguous(inputs.dimshuffle(0, 4, 1, 2, 3))
        topgrad = gpu_contiguous(dCdH.dimshuffle(0, 4, 1, 2, 3))
        if (subsample == (1, 1, 1)):
            conv_gemm = GpuCorr3dMM_gradWeights(subsample=subsample)(img,
                                                                     topgrad)
        else:
            conv_gemm = GpuCorr3dMM_gradWeights(subsample=subsample)(img,
                                                                     topgrad,
                                                                     shape=filters_shape[1:4])
        conv_gemm = conv_gemm.dimshuffle(0, 2, 3, 4, 1)
        f_ref = theano.function([], conv)
        f = theano.function([], conv_gemm)

        res_ref = f_ref()
        res = f()
        utt.assert_allclose(res_ref, res)
示例#2
0
    def run_gradinput(self, inputs_shape, filters_shape,
                      subsample=(1, 1, 1)):

        inputs_val = numpy.random.random(inputs_shape).astype('float32')
        filters_val = numpy.random.random(filters_shape).astype('float32')

        inputs = shared(inputs_val)
        filters = shared(filters_val)

        bottom_height = (inputs_shape[1] - 1) * subsample[0] + filters_shape[1]
        bottom_width = (inputs_shape[2] - 1) * subsample[1] + filters_shape[2]
        bottom_depth = (inputs_shape[3] - 1) * subsample[2] + filters_shape[3]
        bottom_shape = theano.shared(numpy.array([bottom_height, bottom_width, bottom_depth]))

        weight = gpu_contiguous(filters.dimshuffle(0, 4, 1, 2, 3))
        top = gpu_contiguous(inputs.dimshuffle(0, 4, 1, 2, 3))
        if (subsample == (1, 1, 1)):
            conv_ref = Corr3dMM_gradInputs(subsample=subsample)(
                kern=weight, topgrad=top)
            conv_gemm = GpuCorr3dMM_gradInputs(subsample=subsample)(
                kern=weight, topgrad=top)
        else:
            conv_ref = Corr3dMM_gradInputs(subsample=subsample)(
                kern=weight, topgrad=top,
                shape=bottom_shape)
            conv_gemm = GpuCorr3dMM_gradInputs(subsample=subsample)(
                kern=weight, topgrad=top,
                shape=bottom_shape)

        f_ref = theano.function([], conv_ref, mode='FAST_RUN')
        f = theano.function([], conv_gemm, mode=mode_with_gpu)

        res_ref = f_ref()
        res = f()
        utt.assert_allclose(res_ref, res)
示例#3
0
    def test_opt_convtransp3d_gemm(self):
        inputs_shape = (16, 15, 12, 12, 10)
        filters_shape = (10, 6, 12, 4, 1)

        inputs_val = numpy.random.random(inputs_shape).astype('float32')
        filters_val = numpy.random.random(filters_shape).astype('float32')
        bias = shared(numpy.zeros(filters_shape[4]).astype('float32'))

        inputs = shared(inputs_val)
        filters = shared(filters_val)

        conv = theano.tensor.nnet.convTransp3D(W=filters,
                                               b=bias,
                                               d=(1, 1, 1),
                                               H=inputs)
        mode = mode_with_gpu.including('convtransp3d_gemm')

        f_ref = theano.function([], conv)
        f_gemm = theano.function([], conv, mode=mode)

        # make sure we inserted the gemm trickery
        topo = f_gemm.maker.fgraph.toposort()
        assert sum(isinstance(n.op, GpuCorr3dMM_gradInputs) for n in topo) > 0

        res_ref = f_ref()
        res_gemm = f_gemm()
        utt.assert_allclose(res_ref, res_gemm)
示例#4
0
    def test_opt_conv3d_fft(self):
        inputs_shape = (16, 20, 32, 16, 1)
        filters_shape = (10, 6, 12, 4, 1)

        inputs_val = numpy.random.random(inputs_shape).astype('float32')
        filters_val = numpy.random.random(filters_shape).astype('float32')

        inputs = shared(inputs_val)
        filters = shared(filters_val)
        bias = shared(numpy.zeros(filters_shape[0]).astype('float32'))

        conv = theano.tensor.nnet.conv3D(V=inputs,
                                         W=filters,
                                         b=bias,
                                         d=(1, 1, 1))
        ref_mode = copy.copy(theano.compile.get_default_mode())
        ref_mode.check_py_code = False
        mode = mode_with_gpu.including('conv3d_fft')
        mode.check_py_code = False

        f_ref = theano.function([], conv, mode=ref_mode)
        f_fft = theano.function([], conv, mode=mode)

        # make sure we inserted the fft trickery
        topo = f_fft.maker.fgraph.toposort()
        assert sum(
            isinstance(n.op, theano.sandbox.cuda.fftconv.CuFFTOp)
            for n in topo) == 2

        res_ref = f_ref()
        res_fft = f_fft()

        utt.assert_allclose(res_ref, res_fft)
示例#5
0
    def test_opt_nofft_full(self):
        inputs_shape = (5, 3, 7, 6)
        filters_shape = (2, 3, 3, 3)

        inputs_val = numpy.random.random(inputs_shape).astype('float32')
        filters_val = numpy.random.random(filters_shape).astype('float32')

        inputs = shared(inputs_val)
        filters = shared(filters_val)

        conv = theano.tensor.nnet.conv.conv2d(inputs,
                                              filters,
                                              border_mode='full',
                                              version='no_fft')

        mode = mode_with_gpu.including('conv_fft_full')

        f_ref = theano.function([], conv)
        f_fft = theano.function([], conv, mode=mode)

        # make sure we that no CuFFTOp has been inserted
        topo = f_fft.maker.fgraph.toposort()
        assert sum(
            isinstance(n.op, theano.sandbox.cuda.fftconv.CuFFTOp)
            for n in topo) == 0
示例#6
0
    def run_conv_full(self, inputs_shape, filters_shape, pad=False):
        inputs_val = numpy.random.random(inputs_shape).astype('float32')
        filters_val = numpy.random.random(filters_shape).astype('float32')

        inputs = shared(inputs_val)
        filters = shared(filters_val)
        bias = shared(numpy.zeros(filters_shape[4]).astype('float32'))

        conv_ref = theano.tensor.nnet.convTransp3D(W=filters,
                                                   b=bias,
                                                   d=(1, 1, 1),
                                                   H=inputs)

        filters = filters.dimshuffle(4, 0, 1, 2, 3)
        inputs = inputs.dimshuffle(0, 4, 1, 2, 3)
        conv_fft = theano.sandbox.cuda.fftconv.conv3d_fft(inputs,
                                                          filters,
                                                          border_mode="full",
                                                          pad_last_dim=pad)
        conv_fft = conv_fft.dimshuffle(0, 2, 3, 4, 1)

        f_ref = theano.function([], conv_ref)
        f_fft = theano.function([], conv_fft, mode=mode_with_gpu)

        res_ref = f_ref()
        res_fft = f_fft()
        utt.assert_allclose(res_ref, res_fft, rtol=1e-04, atol=1e-04)
示例#7
0
    def test_opt_convgrad3d_gemm(self):
        inputs_shape = (16, 10, 12, 16, 1)
        filters_shape = (10, 6, 12, 4, 1)
        dCdH_shape = (16, 5, 1, 13, 10)

        inputs_val = numpy.random.random(inputs_shape).astype('float32')
        dCdH_val = numpy.random.random(dCdH_shape).astype('float32')

        inputs = shared(inputs_val)
        dCdH = shared(dCdH_val)

        conv = theano.tensor.nnet.convGrad3D(V=inputs,
                                             dCdH=dCdH,
                                             WShape=filters_shape,
                                             d=(1, 1, 1))
        mode = mode_with_gpu.including('convgrad3d_gemm')

        f_ref = theano.function([], conv)
        f_gemm = theano.function([], conv, mode=mode)

        # make sure we inserted the gemm trickery
        topo = f_gemm.maker.fgraph.toposort()
        assert sum(isinstance(n.op, GpuCorr3dMM_gradWeights) for n in topo) > 0

        res_ref = f_ref()
        res_gemm = f_gemm()
        utt.assert_allclose(res_ref, res_gemm)
示例#8
0
    def test_opt_full(self):
        inputs_shape = (5, 3, 7, 6)
        filters_shape = (2, 3, 3, 3)

        inputs_val = numpy.random.random(inputs_shape).astype('float32')
        filters_val = numpy.random.random(filters_shape).astype('float32')

        inputs = shared(inputs_val)
        filters = shared(filters_val)

        conv = theano.tensor.nnet.conv.conv2d(inputs, filters,
                                              border_mode='full')

        mode = mode_with_gpu.including('conv_fft_full')

        f_ref = theano.function([], conv)
        f_fft = theano.function([], conv, mode=mode)

        # make sure we inserted the fft trickery
        topo = f_fft.maker.fgraph.toposort()
        assert sum(isinstance(n.op, theano.sandbox.cuda.fftconv.CuFFTOp)
                   for n in topo) == 2, topo

        res_ref = f_ref()
        res_fft = f_fft()

        utt.assert_allclose(res_ref, res_fft)
示例#9
0
    def test_opt_conv3d_gemm(self):
        inputs_shape = (16, 20, 32, 16, 1)
        filters_shape = (10, 6, 12, 4, 1)

        inputs_val = numpy.random.random(inputs_shape).astype('float32')
        filters_val = numpy.random.random(filters_shape).astype('float32')

        inputs = shared(inputs_val)
        filters = shared(filters_val)
        bias = shared(numpy.zeros(filters_shape[0]).astype('float32'))

        conv = theano.tensor.nnet.conv3D(V=inputs, W=filters,
                                         b=bias, d=(1, 1, 1))
        mode = mode_with_gpu.including('conv3d_gemm')
        mode.check_py_code = False

        f_ref = theano.function([], conv, mode="FAST_RUN")
        f_gemm = theano.function([], conv, mode=mode)

        # make sure we inserted the gemm trickery
        topo = f_gemm.maker.fgraph.toposort()
        assert sum(isinstance(n.op, GpuCorr3dMM) for n in topo) > 0

        res_ref = f_ref()
        res_gemm = f_gemm()
        utt.assert_allclose(res_ref, res_gemm)
示例#10
0
    def test_opt_full(self):
        inputs_shape = (5, 3, 7, 6)
        filters_shape = (2, 3, 3, 3)

        inputs_val = numpy.random.random(inputs_shape).astype('float32')
        filters_val = numpy.random.random(filters_shape).astype('float32')

        inputs = shared(inputs_val)
        filters = shared(filters_val)

        conv = theano.tensor.nnet.conv.conv2d(inputs, filters,
                                              border_mode='full')

        mode = mode_with_gpu.including('conv_fft_full')

        f_ref = theano.function([], conv)
        f_fft = theano.function([], conv, mode=mode)

        # make sure we inserted the fft trickery
        topo = f_fft.maker.fgraph.toposort()
        assert sum(isinstance(n.op, theano.sandbox.cuda.fftconv.CuFFTOp)
                   for n in topo) == 2, topo

        res_ref = f_ref()
        res_fft = f_fft()

        utt.assert_allclose(res_ref, res_fft)
示例#11
0
    def test_opt_convgrad3d_fft(self):
        inputs_shape = (2, 17, 15, 16, 1)
        filters_shape = (10, 6, 7, 4, 1)
        dCdH_shape = (inputs_shape[0],
                      inputs_shape[1] - filters_shape[1] + 1,
                      inputs_shape[2] - filters_shape[2] + 1,
                      inputs_shape[3] - filters_shape[3] + 1,
                      filters_shape[0])

        inputs_val = numpy.random.random(inputs_shape).astype('float32')
        dCdH_val = numpy.random.random(dCdH_shape).astype('float32')

        inputs = shared(inputs_val)
        dCdH = shared(dCdH_val)

        conv = theano.tensor.nnet.convGrad3D(V=inputs, dCdH=dCdH,
                                             WShape=filters_shape,
                                             d=(1, 1, 1))
        mode = mode_with_gpu.including('convgrad3d_fft')
        mode.check_py_code = False

        f_ref = theano.function([], conv, mode="FAST_RUN")
        f_fft = theano.function([], conv, mode=mode)

        # make sure we inserted the fft trickery
        topo = f_fft.maker.fgraph.toposort()
        assert sum(isinstance(n.op, theano.sandbox.cuda.fftconv.CuFFTOp)
                   for n in topo) == 2

        res_ref = f_ref()
        res_fft = f_fft()

        utt.assert_allclose(res_ref, res_fft, rtol=1e-04, atol=1e-04)
示例#12
0
    def test_opt_convtransp3d_fft(self):
        inputs_shape = (2, 9, 16, 12, 10)
        filters_shape = (10, 3, 8, 4, 1)

        inputs_val = numpy.random.random(inputs_shape).astype('float32')
        filters_val = numpy.random.random(filters_shape).astype('float32')
        bias = shared(numpy.zeros(filters_shape[4]).astype('float32'))

        inputs = shared(inputs_val)
        filters = shared(filters_val)

        conv = theano.tensor.nnet.convTransp3D(W=filters, b=bias, d=(1, 1, 1),
                                               H=inputs)
        mode = mode_with_gpu.including('convtransp3d_fft')

        f_ref = theano.function([], conv)
        f_fft = theano.function([], conv, mode=mode)

        # make sure we inserted the fft trickery
        topo = f_fft.maker.fgraph.toposort()
        assert sum(isinstance(n.op, theano.sandbox.cuda.fftconv.CuFFTOp)
                   for n in topo) == 2

        res_ref = f_ref()
        res_fft = f_fft()

        utt.assert_allclose(res_ref, res_fft, rtol=1e-04, atol=1e-04)
示例#13
0
    def run_conv_valid(self, inputs_shape, filters_shape, pad=False):
        inputs_val = numpy.random.random(inputs_shape).astype('float32')
        filters_val = numpy.random.random(filters_shape).astype('float32')

        inputs = shared(inputs_val)
        filters = shared(filters_val)
        bias = shared(numpy.zeros(filters_shape[0]).astype('float32'))

        # Flip filter as conv3D compute correlation
        filters_flip = filters[:, ::-1, ::-1, ::-1, :]
        # filters_flip = filters
        conv_ref = theano.tensor.nnet.conv3D(V=inputs, W=filters_flip,
                                             b=bias, d=(1, 1, 1))

        conv_fft = theano.sandbox.cuda.fftconv.conv3d_fft(
            inputs.dimshuffle(0, 4, 1, 2, 3),
            filters.dimshuffle(0, 4, 1, 2, 3),
            border_mode="valid",
            pad_last_dim=pad)
        conv_fft = conv_fft.dimshuffle(0, 2, 3, 4, 1)

        f_ref = theano.function([], conv_ref, mode="FAST_RUN")
        mode = mode_with_gpu
        mode.check_py_code = False
        f_fft = theano.function([], conv_fft, mode=mode)

        res_ref = f_ref()
        res_fft = f_fft()
        utt.assert_allclose(res_ref, res_fft, rtol=1e-05, atol=1e-05)
示例#14
0
    def run_conv_full(self, inputs_shape, filters_shape, pad=False):
        inputs_val = numpy.random.random(inputs_shape).astype('float32')
        filters_val = numpy.random.random(filters_shape).astype('float32')

        inputs = shared(inputs_val)
        filters = shared(filters_val)
        bias = shared(numpy.zeros(filters_shape[4]).astype('float32'))

        conv_ref = theano.tensor.nnet.convTransp3D(
            W=filters, b=bias, d=(1, 1, 1),
            H=inputs)

        filters = filters.dimshuffle(4, 0, 1, 2, 3)
        inputs = inputs.dimshuffle(0, 4, 1, 2, 3)
        conv_fft = theano.sandbox.cuda.fftconv.conv3d_fft(inputs, filters,
                                                          border_mode="full",
                                                          pad_last_dim=pad)
        conv_fft = conv_fft.dimshuffle(0, 2, 3, 4, 1)

        f_ref = theano.function([], conv_ref)
        f_fft = theano.function([], conv_fft, mode=mode_with_gpu)

        res_ref = f_ref()
        res_fft = f_fft()
        utt.assert_allclose(res_ref, res_fft, rtol=1e-04, atol=1e-04)
示例#15
0
    def test_opt_conv3d_gemm(self):
        inputs_shape = (16, 20, 32, 16, 1)
        filters_shape = (10, 6, 12, 4, 1)

        inputs_val = numpy.random.random(inputs_shape).astype('float32')
        filters_val = numpy.random.random(filters_shape).astype('float32')

        inputs = shared(inputs_val)
        filters = shared(filters_val)
        bias = shared(numpy.zeros(filters_shape[0]).astype('float32'))

        conv = theano.tensor.nnet.conv3D(V=inputs,
                                         W=filters,
                                         b=bias,
                                         d=(1, 1, 1))
        mode = mode_with_gpu.including('conv3d_gemm')
        mode.check_py_code = False

        f_ref = theano.function([], conv, mode="FAST_RUN")
        f_gemm = theano.function([], conv, mode=mode)

        # make sure we inserted the gemm trickery
        topo = f_gemm.maker.fgraph.toposort()
        assert sum(isinstance(n.op, GpuCorr3dMM) for n in topo) > 0

        res_ref = f_ref()
        res_gemm = f_gemm()
        utt.assert_allclose(res_ref, res_gemm)
示例#16
0
    def test_opt_convtransp3d_gemm(self):
        inputs_shape = (16, 15, 12, 12, 10)
        filters_shape = (10, 6, 12, 4, 1)

        inputs_val = numpy.random.random(inputs_shape).astype('float32')
        filters_val = numpy.random.random(filters_shape).astype('float32')
        bias = shared(numpy.zeros(filters_shape[4]).astype('float32'))

        inputs = shared(inputs_val)
        filters = shared(filters_val)

        conv = theano.tensor.nnet.convTransp3D(W=filters, b=bias, d=(1, 1, 1),
                                               H=inputs)
        mode = mode_with_gpu.including('convtransp3d_gemm')

        f_ref = theano.function([], conv)
        f_gemm = theano.function([], conv, mode=mode)

        # make sure we inserted the gemm trickery
        topo = f_gemm.maker.fgraph.toposort()
        assert sum(isinstance(n.op, GpuCorr3dMM_gradInputs) for n in topo) > 0

        res_ref = f_ref()
        res_gemm = f_gemm()
        utt.assert_allclose(res_ref, res_gemm)
示例#17
0
    def run_gradinput(self, inputs_shape, filters_shape, subsample=(1, 1, 1)):

        inputs_val = numpy.random.random(inputs_shape).astype('float32')
        filters_val = numpy.random.random(filters_shape).astype('float32')

        inputs = shared(inputs_val)
        filters = shared(filters_val)
        bias = shared(numpy.zeros(filters_shape[4]).astype('float32'))
        conv = theano.tensor.nnet.convTransp3D(W=filters,
                                               b=bias,
                                               d=subsample,
                                               H=inputs)
        f_ref = theano.function([], conv)
        res_ref = f_ref()

        # Get bottom shape using convTransp3D
        bottom_shape = res_ref.shape
        bottom_val = numpy.random.random(bottom_shape).astype('float32')
        bottom = shared(bottom_val)

        weight = gpu_contiguous(filters.dimshuffle(0, 4, 1, 2, 3))
        top = gpu_contiguous(inputs.dimshuffle(0, 4, 1, 2, 3))
        if (subsample == (1, 1, 1)):
            conv_gemm = GpuCorr3dMM_gradInputs(subsample=subsample)(
                kern=weight, topgrad=top)
        else:
            conv_gemm = GpuCorr3dMM_gradInputs(subsample=subsample)(
                kern=weight, topgrad=top, shape=bottom.shape[1:4])
        conv_gemm = conv_gemm.dimshuffle(0, 2, 3, 4, 1)
        f = theano.function([], conv_gemm, mode=mode_with_gpu)

        res = f()
        utt.assert_allclose(res_ref, res)
示例#18
0
    def run_gradweight(self,
                       inputs_shape,
                       filters_shape,
                       dCdH_shape,
                       subsample=(1, 1, 1)):
        inputs_val = numpy.random.random(inputs_shape).astype('float32')
        dCdH_val = numpy.random.random(dCdH_shape).astype('float32')
        inputs = shared(inputs_val)
        dCdH = shared(dCdH_val)

        img = gpu_contiguous(inputs.dimshuffle(0, 4, 1, 2, 3))
        topgrad = gpu_contiguous(dCdH.dimshuffle(0, 4, 1, 2, 3))
        if (subsample == (1, 1, 1)):
            conv_ref = Corr3dMM_gradWeights(subsample=subsample)(img, topgrad)
            conv_gemm = GpuCorr3dMM_gradWeights(subsample=subsample)(img,
                                                                     topgrad)
        else:
            conv_ref = GpuCorr3dMM_gradWeights(subsample=subsample)(
                img, topgrad, shape=filters_shape[1:4])
            conv_gemm = GpuCorr3dMM_gradWeights(subsample=subsample)(
                img, topgrad, shape=filters_shape[1:4])

        f_ref = theano.function([], conv_ref)
        f = theano.function([], conv_gemm, mode=mode_with_gpu)

        res_ref = f_ref()
        res = f()
        utt.assert_allclose(res_ref, res)
示例#19
0
    def test_opt_convtransp3d_fft(self):
        inputs_shape = (16, 15, 21, 12, 10)
        filters_shape = (10, 6, 12, 4, 1)

        inputs_val = numpy.random.random(inputs_shape).astype('float32')
        filters_val = numpy.random.random(filters_shape).astype('float32')
        bias = shared(numpy.zeros(filters_shape[4]).astype('float32'))

        inputs = shared(inputs_val)
        filters = shared(filters_val)

        conv = theano.tensor.nnet.convTransp3D(W=filters, b=bias, d=(1,1,1),
                                               H=inputs)
        mode = mode_with_gpu.including('convtransp3d_fft')

        f_ref = theano.function([], conv)
        f_fft = theano.function([], conv, mode=mode)

        # make sure we inserted the fft trickery
        topo = f_fft.maker.fgraph.toposort()
        assert sum(isinstance(n.op, theano.sandbox.cuda.fftconv.CuFFTOp)
                   for n in topo) == 2


        res_ref = f_ref()
        res_fft = f_fft()

        utt.assert_allclose(res_ref, res_fft, rtol=1e-04, atol=1e-04)
示例#20
0
    def test_opt_convgrad3d_gemm(self):
        inputs_shape = (16, 10, 12, 16, 1)
        filters_shape = (10, 6, 12, 4, 1)
        dCdH_shape = (16, 5, 1, 13, 10)

        inputs_val = numpy.random.random(inputs_shape).astype('float32')
        dCdH_val = numpy.random.random(dCdH_shape).astype('float32')

        inputs = shared(inputs_val)
        dCdH = shared(dCdH_val)

        conv = theano.tensor.nnet.convGrad3D(V=inputs, dCdH=dCdH,
                                             WShape=filters_shape,
                                             d=(1, 1, 1))
        mode = mode_with_gpu.including('convgrad3d_gemm')

        f_ref = theano.function([], conv)
        f_gemm = theano.function([], conv, mode=mode)

        # make sure we inserted the gemm trickery
        topo = f_gemm.maker.fgraph.toposort()
        assert sum(isinstance(n.op, GpuCorr3dMM_gradWeights) for n in topo) > 0

        res_ref = f_ref()
        res_gemm = f_gemm()
        utt.assert_allclose(res_ref, res_gemm)
示例#21
0
    def run_gradinput(self, inputs_shape, filters_shape, subsample=(1, 1, 1)):

        inputs_val = numpy.random.random(inputs_shape).astype('float32')
        filters_val = numpy.random.random(filters_shape).astype('float32')

        inputs = shared(inputs_val)
        filters = shared(filters_val)

        bottom_height = (inputs_shape[1] - 1) * subsample[0] + filters_shape[1]
        bottom_width = (inputs_shape[2] - 1) * subsample[1] + filters_shape[2]
        bottom_depth = (inputs_shape[3] - 1) * subsample[2] + filters_shape[3]
        bottom_shape = theano.shared(
            numpy.array([bottom_height, bottom_width, bottom_depth]))

        weight = gpu_contiguous(filters.dimshuffle(0, 4, 1, 2, 3))
        top = gpu_contiguous(inputs.dimshuffle(0, 4, 1, 2, 3))
        if (subsample == (1, 1, 1)):
            conv_ref = Corr3dMM_gradInputs(subsample=subsample)(kern=weight,
                                                                topgrad=top)
            conv_gemm = GpuCorr3dMM_gradInputs(subsample=subsample)(
                kern=weight, topgrad=top)
        else:
            conv_ref = Corr3dMM_gradInputs(subsample=subsample)(
                kern=weight, topgrad=top, shape=bottom_shape)
            conv_gemm = GpuCorr3dMM_gradInputs(subsample=subsample)(
                kern=weight, topgrad=top, shape=bottom_shape)

        f_ref = theano.function([], conv_ref)
        f = theano.function([], conv_gemm, mode=mode_with_gpu)

        res_ref = f_ref()
        res = f()
        utt.assert_allclose(res_ref, res)
示例#22
0
    def test_opt_convgrad3d_fft(self):
        inputs_shape = (16, 20, 32, 16, 1)
        filters_shape = (10, 6, 12, 4, 1)
        dCdH_shape = (16, 15, 21, 13, 10)

        inputs_val = numpy.random.random(inputs_shape).astype('float32')
        dCdH_val = numpy.random.random(dCdH_shape).astype('float32')

        inputs = shared(inputs_val)
        dCdH = shared(dCdH_val)

        conv = theano.tensor.nnet.convGrad3D(V=inputs, dCdH=dCdH,
                                             WShape=filters_shape,
                                             d=(1,1,1))
        mode = mode_with_gpu.including('convgrad3d_fft')
        mode.check_py_code = False

        f_ref = theano.function([], conv, mode="FAST_RUN")
        f_fft = theano.function([], conv, mode=mode)

        # make sure we inserted the fft trickery
        topo = f_fft.maker.fgraph.toposort()
        assert sum(isinstance(n.op, theano.sandbox.cuda.fftconv.CuFFTOp)
                   for n in topo) == 2


        res_ref = f_ref()
        res_fft = f_fft()

        utt.assert_allclose(res_ref, res_fft,  rtol=1e-04, atol=1e-04)
示例#23
0
    def run_conv_valid(self, inputs_shape, filters_shape, pad=False):
        inputs_val = numpy.random.random(inputs_shape).astype('float32')
        filters_val = numpy.random.random(filters_shape).astype('float32')

        inputs = shared(inputs_val)
        filters = shared(filters_val)
        bias = shared(numpy.zeros(filters_shape[0]).astype('float32'))

        # Flip filter as conv3D compute correlation
        filters_flip = filters[:,::-1,::-1,::-1,:]
        #filters_flip = filters
        conv_ref = theano.tensor.nnet.conv3D(V=inputs, W=filters_flip,
                                             b=bias, d=(1,1,1))

        conv_fft = theano.sandbox.cuda.fftconv.conv3d_fft(inputs.dimshuffle(0, 4, 1, 2, 3),
                                                          filters.dimshuffle(0, 4, 1, 2, 3),
                                                          border_mode = "valid",
                                                          pad_last_dim = pad)
        conv_fft = conv_fft.dimshuffle(0, 2, 3, 4, 1)

        f_ref = theano.function([], conv_ref, mode="FAST_RUN")
        mode = mode_with_gpu
        mode.check_py_code = False
        f_fft = theano.function([], conv_fft, mode=mode)

        res_ref = f_ref()
        res_fft = f_fft()
        utt.assert_allclose(res_ref, res_fft,  rtol=1e-05, atol=1e-05)
示例#24
0
    def run_gradinput(self, inputs_shape, filters_shape,
                      subsample=(1, 1, 1)):

        inputs_val = numpy.random.random(inputs_shape).astype('float32')
        filters_val = numpy.random.random(filters_shape).astype('float32')

        inputs = shared(inputs_val)
        filters = shared(filters_val)
        bias = shared(numpy.zeros(filters_shape[4]).astype('float32'))
        conv = theano.tensor.nnet.convTransp3D(W=filters,
                                               b=bias,
                                               d=subsample,
                                               H=inputs)
        f_ref = theano.function([], conv)
        res_ref = f_ref()

        # Get bottom shape using convTransp3D
        bottom_shape = res_ref.shape
        bottom_val = numpy.random.random(bottom_shape).astype('float32')
        bottom = shared(bottom_val)

        weight = gpu_contiguous(filters.dimshuffle(0, 4, 1, 2, 3))
        top = gpu_contiguous(inputs.dimshuffle(0, 4, 1, 2, 3))
        if (subsample == (1, 1, 1)):
            conv_gemm = GpuCorr3dMM_gradInputs(subsample=subsample)(
                kern=weight, topgrad=top)
        else:
            conv_gemm = GpuCorr3dMM_gradInputs(subsample=subsample)(
                kern=weight, topgrad=top,
                shape=bottom.shape[1:4])
        conv_gemm = conv_gemm.dimshuffle(0, 2, 3, 4, 1)
        f = theano.function([], conv_gemm, mode=mode_with_gpu)

        res = f()
        utt.assert_allclose(res_ref, res)
示例#25
0
    def run_gradweight(self, inputs_shape, filters_shape, dCdH_shape,
                       subsample=(1, 1, 1)):
        inputs_val = numpy.random.random(inputs_shape).astype('float32')
        dCdH_val = numpy.random.random(dCdH_shape).astype('float32')
        inputs = shared(inputs_val)
        dCdH = shared(dCdH_val)

        conv = theano.tensor.nnet.convGrad3D(V=inputs, dCdH=dCdH,
                                             WShape=filters_shape,
                                             d=subsample)
        img = gpu_contiguous(inputs.dimshuffle(0, 4, 1, 2, 3))
        topgrad = gpu_contiguous(dCdH.dimshuffle(0, 4, 1, 2, 3))
        if (subsample == (1, 1, 1)):
            conv_gemm = GpuCorr3dMM_gradWeights(subsample=subsample)(img,
                                                                     topgrad)
        else:
            conv_gemm = GpuCorr3dMM_gradWeights(subsample=subsample)(
                img, topgrad, shape=filters_shape[1:4])
        conv_gemm = conv_gemm.dimshuffle(0, 2, 3, 4, 1)
        f_ref = theano.function([], conv)
        f = theano.function([], conv_gemm, mode=mode_with_gpu)

        res_ref = f_ref()
        res = f()
        utt.assert_allclose(res_ref, res)
示例#26
0
    def run_conv3d_fwd(inputs_shape, filters_shape, subsample,
                       border_mode, conv_mode):

        inputs_val = numpy.random.random(inputs_shape).astype('float32')
        filters_val = numpy.random.random(filters_shape).astype('float32')

        # Scale down the input values to prevent very large absolute errors
        # due to float rounding
        inputs_val /= 10
        filters_val /= 10

        inputs = shared(inputs_val)
        filters = shared(filters_val)
        bias = shared(numpy.zeros(filters_shape[0]).astype('float32'))

        # Compile a theano function for the CuDNN implementation
        conv = dnn.dnn_conv3d(img=inputs, kerns=filters,
                              border_mode=border_mode, subsample=subsample,
                              conv_mode=conv_mode)
        f = theano.function([], conv, mode=mode_with_gpu)

        # If conv_mode is 'conv' the reference implementation should use
        # filters filpped according to the width, height and time axis
        if conv_mode == 'conv':
            flipped_filters = filters[:, :, ::-1, ::-1, ::-1]
        else:
            flipped_filters = filters

        # If border mode is anything but 'valid', the reference implementation
        # should operate on padded inputs
        if border_mode == 'valid':
            padded_inputs = inputs
        else:
            if border_mode == 'full':
                pad_per_dim = [filters_shape[i] - 1 for i in range(2, 5)]
            else:
                if isinstance(border_mode, int):
                    pad_per_dim = [border_mode] * 3
                else:
                    pad_per_dim = border_mode

            pad_before_after = ([(0, 0), (0, 0)] +
                                [(p, p) for p in pad_per_dim])
            padded_inputs_val = numpy.pad(inputs_val, pad_before_after,
                                          'constant')
            padded_inputs = shared(padded_inputs_val)

        # Compile a theano function for the reference implementation
        conv_ref = theano.tensor.nnet.conv3D(
            V=padded_inputs.dimshuffle(0, 2, 3, 4, 1),
            W=flipped_filters.dimshuffle(0, 2, 3, 4, 1),
            b=bias, d=subsample)
        f_ref = theano.function([], conv_ref.dimshuffle(0, 4, 1, 2, 3))

        # Compare the results of the two implementations
        res_ref = f_ref()
        res = f()
        utt.assert_allclose(res_ref, res)
示例#27
0
    def run_conv3d_fwd(inputs_shape, filters_shape, subsample,
                       border_mode, conv_mode):

        inputs_val = numpy.random.random(inputs_shape).astype('float32')
        filters_val = numpy.random.random(filters_shape).astype('float32')

        # Scale down the input values to prevent very large absolute errors
        # due to float rounding
        inputs_val /= 10
        filters_val /= 10

        inputs = shared(inputs_val)
        filters = shared(filters_val)
        bias = shared(numpy.zeros(filters_shape[0]).astype('float32'))

        # Compile a theano function for the CuDNN implementation
        conv = dnn.dnn_conv3d(img=inputs, kerns=filters,
                              border_mode=border_mode, subsample=subsample,
                              conv_mode=conv_mode)
        f = theano.function([], conv, mode=mode_with_gpu)

        # If conv_mode is 'conv' the reference implementation should use
        # filters filpped according to the width, height and time axis
        if conv_mode == 'conv':
            flipped_filters = filters[:, :, ::-1, ::-1, ::-1]
        else:
            flipped_filters = filters

        # If border mode is anything but 'valid', the reference implementation
        # should operate on padded inputs
        if border_mode == 'valid':
            padded_inputs = inputs
        else:
            if border_mode == 'full':
                pad_per_dim = [filters_shape[i] - 1 for i in range(2, 5)]
            else:
                if isinstance(border_mode, int):
                    pad_per_dim = [border_mode] * 3
                else:
                    pad_per_dim = border_mode

            pad_before_after = ([(0, 0), (0, 0)] +
                                [(p, p) for p in pad_per_dim])
            padded_inputs_val = numpy.pad(inputs_val, pad_before_after,
                                          'constant')
            padded_inputs = shared(padded_inputs_val)

        # Compile a theano function for the reference implementation
        conv_ref = theano.tensor.nnet.conv3D(
            V=padded_inputs.dimshuffle(0, 2, 3, 4, 1),
            W=flipped_filters.dimshuffle(0, 2, 3, 4, 1),
            b=bias, d=subsample)
        f_ref = theano.function([], conv_ref.dimshuffle(0, 4, 1, 2, 3))

        # Compare the results of the two implementations
        res_ref = f_ref()
        res = f()
        utt.assert_allclose(res_ref, res)
示例#28
0
    def run_conv(self, inputs_shape, filters_shape, pad=False, **other_args):
        inputs_val = numpy.random.random(inputs_shape).astype("float32")
        filters_val = numpy.random.random(filters_shape).astype("float32")

        inputs = shared(inputs_val)
        filters = shared(filters_val)

        conv_ref = theano.tensor.nnet.conv.conv2d(inputs, filters, **other_args)
        conv_fft = theano.sandbox.cuda.fftconv.conv2d_fft(inputs, filters, pad_last_dim=pad, **other_args)

        f_ref = theano.function([], conv_ref)
        f_fft = theano.function([], conv_fft, mode=mode_with_gpu)

        res_ref = f_ref()
        res_fft = f_fft()

        utt.assert_allclose(res_ref, res_fft)
    def run_conv_valid(self,
                       inputs_shape,
                       filters_shape,
                       border_mode='valid',
                       filter_dilation=(1, 1, 1),
                       subsample=(1, 1, 1),
                       verify_grad=False):
        inputs_val = numpy.random.random(inputs_shape).astype('float32')
        filters_val = numpy.random.random(filters_shape).astype('float32')

        inputs = shared(inputs_val)
        filters = shared(filters_val)

        conv_ref = Corr3dMM(border_mode=border_mode,
                            filter_dilation=filter_dilation,
                            subsample=subsample)(inputs.dimshuffle(
                                0, 4, 1, 2,
                                3), filters.dimshuffle(0, 4, 1, 2, 3))
        conv_ref = conv_ref.dimshuffle(0, 2, 3, 4, 1)
        f_ref = theano.function([], conv_ref, mode='FAST_RUN')

        conv = GpuCorr3dMM(border_mode=border_mode,
                           filter_dilation=filter_dilation,
                           subsample=subsample)(inputs.dimshuffle(
                               0, 4, 1, 2,
                               3), filters.dimshuffle(0, 4, 1, 2, 3))
        conv = conv.dimshuffle(0, 2, 3, 4, 1)
        f = theano.function([], conv, mode=mode_with_gpu)

        res_ref = f_ref()
        res = f()
        utt.assert_allclose(res_ref, res)

        if verify_grad:
            utt.verify_grad(GpuCorr3dMM(border_mode=border_mode,
                                        filter_dilation=filter_dilation,
                                        subsample=subsample),
                            [
                                inputs_val.transpose(0, 4, 1, 2, 3),
                                filters_val.transpose(0, 4, 1, 2, 3)
                            ],
                            mode=mode_with_gpu)
示例#30
0
    def run_conv(self, inputs_shape, filters_shape, pad=False, **other_args):
        inputs_val = numpy.random.random(inputs_shape).astype('float32')
        filters_val = numpy.random.random(filters_shape).astype('float32')

        inputs = shared(inputs_val)
        filters = shared(filters_val)

        conv_ref = theano.tensor.nnet.conv.conv2d(inputs, filters,
                                                  **other_args)
        conv_fft = theano.sandbox.cuda.fftconv.conv2d_fft(inputs, filters,
                                                          pad_last_dim=pad,
                                                          **other_args)

        f_ref = theano.function([], conv_ref)
        f_fft = theano.function([], conv_fft, mode=mode_with_gpu)

        res_ref = f_ref()
        res_fft = f_fft()

        utt.assert_allclose(res_ref, res_fft)
示例#31
0
    def test_opt_nofft_valid(self):
        inputs_shape = (5, 3, 7, 6)
        filters_shape = (2, 3, 3, 3)

        inputs_val = numpy.random.random(inputs_shape).astype('float32')
        filters_val = numpy.random.random(filters_shape).astype('float32')

        inputs = shared(inputs_val)
        filters = shared(filters_val)

        conv = theano.tensor.nnet.conv.conv2d(inputs, filters,
                                              version='no_fft')

        mode = mode_with_gpu.including('conv_fft_valid')

        f_fft = theano.function([], conv, mode=mode)

        # make sure we that no CuFFTOp has been inserted
        topo = f_fft.maker.fgraph.toposort()
        assert sum(isinstance(n.op, theano.sandbox.cuda.fftconv.CuFFTOp)
                   for n in topo) == 0
示例#32
0
    def run_conv_valid(self, inputs_shape, filters_shape,
                       subsample=(1, 1, 1)):
        inputs_val = numpy.random.random(inputs_shape).astype('float32')
        filters_val = numpy.random.random(filters_shape).astype('float32')

        inputs = shared(inputs_val)
        filters = shared(filters_val)
        bias = shared(numpy.zeros(filters_shape[0]).astype('float32'))
        conv_ref = theano.tensor.nnet.conv3D(V=inputs, W=filters,
                                             b=bias, d=subsample)
        conv = GpuCorr3dMM(border_mode = "valid",
                           subsample=subsample)(inputs.dimshuffle(0, 4, 1, 2, 3),
                                                filters.dimshuffle(0, 4, 1, 2, 3))
        conv = conv.dimshuffle(0, 2, 3, 4, 1)

        f_ref = theano.function([], conv_ref)
        f = theano.function([], conv, mode=mode_with_gpu)

        res_ref = f_ref()
        res = f()
        utt.assert_allclose(res_ref, res)
示例#33
0
    def run_conv_valid(self, inputs_shape, filters_shape,
                       border_mode='valid',
                       filter_dilation=(1, 1, 1),
                       subsample=(1, 1, 1),
                       verify_grad=False):
        inputs_val = numpy.random.random(inputs_shape).astype('float32')
        filters_val = numpy.random.random(filters_shape).astype('float32')

        inputs = shared(inputs_val)
        filters = shared(filters_val)

        conv_ref = Corr3dMM(border_mode=border_mode,
                            filter_dilation=filter_dilation,
                            subsample=subsample)(
                                inputs.dimshuffle(0, 4, 1, 2, 3),
                                filters.dimshuffle(0, 4, 1, 2, 3))
        conv_ref = conv_ref.dimshuffle(0, 2, 3, 4, 1)
        f_ref = theano.function([], conv_ref, mode='FAST_RUN')

        conv = GpuCorr3dMM(border_mode=border_mode,
                           filter_dilation=filter_dilation,
                           subsample=subsample)(
                               inputs.dimshuffle(0, 4, 1, 2, 3),
                               filters.dimshuffle(0, 4, 1, 2, 3))
        conv = conv.dimshuffle(0, 2, 3, 4, 1)
        f = theano.function([], conv, mode=mode_with_gpu)

        res_ref = f_ref()
        res = f()
        utt.assert_allclose(res_ref, res)

        if verify_grad:
            utt.verify_grad(GpuCorr3dMM(border_mode=border_mode,
                                        filter_dilation=filter_dilation,
                                        subsample=subsample),
                            [inputs_val.transpose(0, 4, 1, 2, 3),
                             filters_val.transpose(0, 4, 1, 2, 3)],
                            mode=mode_with_gpu)
示例#34
0
    def run_conv3d_bwd(inputs_shape, filters_shape, subsample, border_mode,
                       conv_mode):

        inputs_val = numpy.random.random(inputs_shape).astype('float32')
        filters_val = numpy.random.random(filters_shape).astype('float32')

        inputs = shared(inputs_val)
        filters = shared(filters_val)
        bias = shared(numpy.zeros(filters_shape[0]).astype('float32'))

        # Compile a theano function for the CuDNN implementation
        conv = dnn.dnn_conv3d(img=inputs,
                              kerns=filters,
                              border_mode=border_mode,
                              subsample=subsample,
                              conv_mode=conv_mode)

        grad_i, grad_w = theano.tensor.grad(conv.sum(), [inputs, filters])

        f = theano.function([], [grad_i, grad_w], mode=mode_with_gpu)

        # If conv_mode is 'conv' the reference implementation should use
        # filters filpped according to the width, height and time axis
        if conv_mode == 'conv':
            flipped_filters = filters[:, :, ::-1, ::-1, ::-1]
        else:
            flipped_filters = filters

        # If border mode is anything but 'valid', the reference implementation
        # should operate on padded inputs
        if border_mode == 'valid':
            padded_inputs = inputs
        else:
            if border_mode == 'full':
                pad_per_dim = [filters_shape[i] - 1 for i in range(2, 5)]
            else:
                if isinstance(border_mode, int):
                    pad_per_dim = [border_mode] * 3
                else:
                    pad_per_dim = border_mode

            pad_before_after = ([(0, 0),
                                 (0, 0)] + [(p, p) for p in pad_per_dim])
            padded_inputs_val = numpy.pad(inputs_val, pad_before_after,
                                          'constant')
            padded_inputs = shared(padded_inputs_val)

        # Compile a theano function for the reference implementation
        conv_ref = theano.tensor.nnet.conv3D(
            V=padded_inputs.dimshuffle(0, 2, 3, 4, 1),
            W=flipped_filters.dimshuffle(0, 2, 3, 4, 1),
            b=bias,
            d=subsample)
        (grad_padded_i_ref,
         grad_w_ref) = theano.tensor.grad(conv_ref.sum(),
                                          [padded_inputs, filters])

        # Recover grad_i_ref from grad_padded_i_ref
        if border_mode == 'valid':
            grad_i_ref = grad_padded_i_ref
        else:
            shp = grad_padded_i_ref.shape
            grad_i_ref = grad_padded_i_ref[:, :, pad_per_dim[0]:shp[2] -
                                           pad_per_dim[0],
                                           pad_per_dim[1]:shp[3] -
                                           pad_per_dim[1],
                                           pad_per_dim[2]:shp[4] -
                                           pad_per_dim[2]]

        f_ref = theano.function([], [grad_i_ref, grad_w_ref])

        # Compare the results of the two implementations
        res_ref = f_ref()
        res = f()
        utt.assert_allclose(res_ref[0], res[0])
        utt.assert_allclose(res_ref[1], res[1])
示例#35
0
    def run_conv3d_bwd(inputs_shape, filters_shape, subsample,
                       border_mode, conv_mode):

        inputs_val = numpy.random.random(inputs_shape).astype('float32')
        filters_val = numpy.random.random(filters_shape).astype('float32')

        inputs = shared(inputs_val)
        filters = shared(filters_val)
        bias = shared(numpy.zeros(filters_shape[0]).astype('float32'))

        # Compile a theano function for the CuDNN implementation
        conv = dnn.dnn_conv3d(img=inputs, kerns=filters,
                              border_mode=border_mode, subsample=subsample,
                              conv_mode=conv_mode)

        grad_i, grad_w = theano.tensor.grad(conv.sum(), [inputs, filters])

        f = theano.function([], [grad_i, grad_w], mode=mode_with_gpu)

        # If conv_mode is 'conv' the reference implementation should use
        # filters filpped according to the width, height and time axis
        if conv_mode == 'conv':
            flipped_filters = filters[:, :, ::-1, ::-1, ::-1]
        else:
            flipped_filters = filters

        # If border mode is anything but 'valid', the reference implementation
        # should operate on padded inputs
        if border_mode == 'valid':
            padded_inputs = inputs
        else:
            if border_mode == 'full':
                pad_per_dim = [filters_shape[i] - 1 for i in range(2, 5)]
            else:
                if isinstance(border_mode, int):
                    pad_per_dim = [border_mode] * 3
                else:
                    pad_per_dim = border_mode

            pad_before_after = ([(0, 0), (0, 0)] +
                                [(p, p) for p in pad_per_dim])
            padded_inputs_val = numpy.pad(inputs_val, pad_before_after,
                                          'constant')
            padded_inputs = shared(padded_inputs_val)

        # Compile a theano function for the reference implementation
        conv_ref = theano.tensor.nnet.conv3D(
            V=padded_inputs.dimshuffle(0, 2, 3, 4, 1),
            W=flipped_filters.dimshuffle(0, 2, 3, 4, 1),
            b=bias, d=subsample)
        (grad_padded_i_ref,
         grad_w_ref) = theano.tensor.grad(conv_ref.sum(),
                                          [padded_inputs, filters])

        # Recover grad_i_ref from grad_padded_i_ref
        if border_mode == 'valid':
            grad_i_ref = grad_padded_i_ref
        else:
            shp = grad_padded_i_ref.shape
            grad_i_ref = grad_padded_i_ref[
                :, :,
                pad_per_dim[0]:shp[2] - pad_per_dim[0],
                pad_per_dim[1]:shp[3] - pad_per_dim[1],
                pad_per_dim[2]:shp[4] - pad_per_dim[2]]

        f_ref = theano.function([], [grad_i_ref, grad_w_ref])

        # Compare the results of the two implementations
        res_ref = f_ref()
        res = f()
        utt.assert_allclose(res_ref[0], res[0])
        utt.assert_allclose(res_ref[1], res[1])