def test_weight_acts_strided():

    # Tests that WeightActs with all possible strides 

    rng = np.random.RandomState([2012,10,9])

    #Each list in shape_list : 
    #[img_shape,filter_shape]
    #[(channels, rows, cols, batch_size),(channels, filter_rows, filter_cols, num_filters)]
    shape_list = [[(1, 7, 8, 5),     (1, 2, 2, 16)],
                  [(3, 7, 8, 5),     (3, 3, 3, 16)],
                  [(16, 11, 11, 4),  (16, 4, 4, 16)], 
                  [(3, 20, 20, 3),   (3, 5, 5, 16)],
                  [(3, 21, 21, 3),   (3, 6, 6, 16)],
                  ]
    for partial_sum in [0, 1, 4]:
        print("partial_sum: %d"%(partial_sum))
        for test_idx in xrange(len(shape_list)):
            images = rng.uniform(-1., 1., shape_list[test_idx][0]).astype('float32')
            filters = rng.uniform(-1., 1., shape_list[test_idx][1]).astype('float32')
            gpu_images = float32_shared_constructor(images,name='images')
            print("test case %d..."%(test_idx+1))
              
            for ii in xrange(filters.shape[1]):
                stride = ii + 1                            
                output_python = FilterActs_python(images,filters,stride)   
                _, h_rows, h_cols, _ = output_python.shape
                if partial_sum == 4:
                    if (h_rows*h_cols)%partial_sum != 0:
                        print("skip test case %d, stride %d when partial_sum is equal to %d"%(test_idx+1,stride,partial_sum))
                        break
                hidacts = rng.uniform(-1., 1., output_python.shape).astype('float32')
                gpu_hidacts = float32_shared_constructor(hidacts,name='hidacts')
                    
                weights_grad_python = WeightActs_python(images,hidacts,filters.shape[1],filters.shape[2],stride)
                
                weights_grad = WeightActs(partial_sum=partial_sum,stride=stride)(
                                                    gpu_images,
                                                    gpu_hidacts,
                                                    as_tensor_variable((filters.shape[1], filters.shape[2]))
                                                   )[0]
                weights_grad = host_from_gpu(weights_grad)
                f = function([], weights_grad)
                weights_grad_val = f()   
                
                warnings.warn("""test_weight_acts_strided success criterion is not very strict.""")
                
                if np.abs(weights_grad_val - weights_grad_python).max() > 3.4e-5:
                    assert type(weights_grad_val) == type(weights_grad_python)
                    assert weights_grad_val.dtype == weights_grad_python.dtype
                    if weights_grad_val.shape != weights_grad_python.shape:
                        print('cuda-convnet shape: ',weights_grad_val.shape)
                        print('python conv shape: ',weights_grad_python.shape)
                        assert False
                    err = np.abs(weights_grad_val - weights_grad_python)
                    print('stride %d'%stride)
                    print('absolute error range: ', (err.min(), err.max()))
                    print('mean absolute error: ', err.mean())
                    print('cuda-convnet value range: ', (weights_grad_val.min(), weights_grad_val.max()))
                    print('python conv value range: ', (weights_grad_python.min(), weights_grad_python.max()))
示例#2
0
    def grad(self, inputs, dout):
        """
        .. todo::

            WRITEME
        """
        images, filters = inputs

        if 'Cuda' not in str(type(images)):
            raise TypeError("inputs must be cuda")
        if 'Cuda' not in str(type(filters)):
            raise TypeError("filters must be cuda")

        dout, = dout
        dout = gpu_contiguous(dout)

        if 'Cuda' not in str(type(dout)):
            raise TypeError("output gradients must be cuda")

        ishape = images.shape[1:3]
        fshape = filters.shape[1:3]
        d_images = ImageActs(self.pad, self.partial_sum,
                             self.stride)(dout, filters, ishape)
        d_filters = WeightActs(self.pad, self.partial_sum,
                               self.stride)(images, dout, fshape)[0]
        return d_images, d_filters
示例#3
0
    def grad(self, inputs, g_outputs):
        """
        .. todo::

            WRITEME
        """
        hid_acts, filters, output_shape = inputs
        g_images, = g_outputs
        g_images = as_cuda_ndarray_variable(g_images)
        assert not isinstance(g_images, list)

        global FilterActs
        global WeightActs
        if FilterActs is None:
            from pylearn2.sandbox.cuda_convnet.filter_acts import FilterActs
            from pylearn2.sandbox.cuda_convnet.weight_acts import WeightActs

        g_filters = WeightActs(stride=self.stride,
                               partial_sum=self.partial_sum,
                               pad=self.pad)(g_images, hid_acts,
                                             filters.shape[1:3])[0]
        assert not isinstance(g_filters, list)
        g_hid_acts = FilterActs(stride=self.stride,
                                pad=self.pad,
                                partial_sum=self.partial_sum)(g_images,
                                                              filters)

        return [g_hid_acts, g_filters, DisconnectedType()()]
示例#4
0
    def grad(self, inputs, dout):

        images, filters = inputs

        if 'Cuda' not in str(type(images)):
            raise TypeError("inputs must be cuda")
        if 'Cuda' not in str(type(filters)):
            raise TypeError("filters must be cuda")

        dout, = dout
        dout = gpu_contiguous(dout)

        if 'Cuda' not in str(type(dout)):
            raise TypeError("output gradients must be cuda")

        d_images = ImageActs(self.pad, self.partial_sum)(dout, filters)
        d_filters = WeightActs(self.pad, self.partial_sum)(images, dout)[0]

        return d_images, d_filters
示例#5
0
def test_match_grad_valid_conv():

    # Tests that weightActs is the gradient of FilterActs
    # with respect to the weights.

    for partial_sum in [0, 1, 4]:
        rng = np.random.RandomState([2012, 10, 9])

        batch_size = 3
        rows = 7
        cols = 9
        channels = 8
        filter_rows = 4
        filter_cols = filter_rows
        num_filters = 16

        images = shared(rng.uniform(-1., 1., (channels, rows, cols,
                                              batch_size)).astype('float32'),
                        name='images')
        filters = rng.uniform(-1., 1.,
                              (channels, filter_rows,
                               filter_cols, num_filters)).astype('float32')
        filters = shared(filters, name='filters')

        gpu_images = gpu_from_host(images)
        gpu_filters = gpu_from_host(filters)

        output = FilterActs(partial_sum=partial_sum)(gpu_images, gpu_filters)
        output = host_from_gpu(output)

        images_bc01 = images.dimshuffle(3, 0, 1, 2)
        filters_bc01 = filters.dimshuffle(3, 0, 1, 2)
        filters_bc01 = filters_bc01[:, :, ::-1, ::-1]

        output_conv2d = conv2d(images_bc01, filters_bc01,
                               border_mode='valid')

        output_conv2d = output_conv2d.dimshuffle(1, 2, 3, 0)

        theano_rng = MRG_RandomStreams(2013 + 1 + 31)

        coeffs = theano_rng.normal(avg=0., std=1.,
                                   size=output_conv2d.shape, dtype='float32')

        cost_conv2d = (coeffs * output_conv2d).sum()

        weights_grad_conv2d = T.grad(cost_conv2d, filters)

        cost = (coeffs * output).sum()
        hid_acts_grad = T.grad(cost, output)

        weights_grad = WeightActs(partial_sum=partial_sum)(
            gpu_images,
            gpu_from_host(hid_acts_grad),
            as_tensor_variable((4, 4))
        )[0]
        weights_grad = host_from_gpu(weights_grad)

        f = function([], [output, output_conv2d, weights_grad,
                          weights_grad_conv2d])

        output, output_conv2d, weights_grad, weights_grad_conv2d = f()

        if np.abs(output - output_conv2d).max() > 8e-6:
            assert type(output) == type(output_conv2d)
            assert output.dtype == output_conv2d.dtype
            if output.shape != output_conv2d.shape:
                print('cuda-convnet shape: ', output.shape)
                print('theano shape: ', output_conv2d.shape)
                assert False
            err = np.abs(output - output_conv2d)
            print('absolute error range: ', (err.min(), err.max()))
            print('mean absolute error: ', err.mean())
            print('cuda-convnet value range: ', (output.min(), output.max()))
            print('theano value range: ', (output_conv2d.min(),
                                           output_conv2d.max()))
            assert False

        warnings.warn(
            "test_match_grad_valid_conv success criterion is not very strict."
            " Can we verify that this is OK? One possibility is that theano"
            " is numerically unstable and Alex's code is better. Probably"
            " theano CPU 64 bit is OK but it's worth checking the others.")

        if np.abs(weights_grad - weights_grad_conv2d).max() > 8.6e-6:
            if type(weights_grad) != type(weights_grad_conv2d):
                raise AssertionError("weights_grad is of type " +
                                     str(weights_grad))
            assert weights_grad.dtype == weights_grad_conv2d.dtype
            if weights_grad.shape != weights_grad_conv2d.shape:
                print('cuda-convnet shape: ', weights_grad.shape)
                print('theano shape: ', weights_grad_conv2d.shape)
                assert False
            err = np.abs(weights_grad - weights_grad_conv2d)
            print('absolute error range: ', (err.min(), err.max()))
            print('mean absolute error: ', err.mean())
            print('cuda-convnet value range: ', (weights_grad.min(),
                                                 weights_grad.max()))
            print('theano value range: ', (weights_grad_conv2d.min(),
                                           weights_grad_conv2d.max()))
            assert False
示例#6
0
def benchmark(n_imgs, n_channels, img_shape, n_filters, filter_shape, pad):
    print('\nn_imgs: %i, n_channels: %i, img_shape: (%i, %i), ' %
          ((n_imgs, n_channels) + img_shape) +
          'n_filters: %i, filter_shape: (%i, %i), pad: %i' %
          ((n_filters, ) + filter_shape + (pad, )))

    # Setup arrays
    padding = (pad, pad)
    strides = (1, 1)
    img_h, img_w = img_shape
    filter_h, filter_w = filter_shape
    convout_h = img_h + 2 * pad - filter_h + 1
    convout_w = img_w + 2 * pad - filter_w + 1

    imgs_bc01_shape = (n_imgs, n_channels, img_h, img_w)
    filters_bc01_shape = (n_filters, n_channels, filter_h, filter_w)

    imgs_bc01 = np.random.randn(n_imgs, n_channels, img_h, img_w)
    imgs_c01b = np.transpose(imgs_bc01, (1, 2, 3, 0))
    filters_fc01 = np.random.randn(n_filters, n_channels, filter_h, filter_w)
    filters_c01f = np.transpose(filters_fc01, (1, 2, 3, 0))
    convout_bc01 = np.random.randn(n_imgs, n_filters, convout_h, convout_w)
    convout_c01b = np.transpose(convout_bc01, (1, 2, 3, 0))

    imgs_bc01_t = theano.shared(imgs_bc01.astype(theano.config.floatX))
    imgs_c01b_t = theano.shared(imgs_c01b.astype(theano.config.floatX))
    filters_fc01_t = theano.shared(filters_fc01.astype(theano.config.floatX))
    filters_c01f_t = theano.shared(filters_c01f.astype(theano.config.floatX))
    convout_bc01_t = theano.shared(convout_bc01.astype(theano.config.floatX))
    convout_c01b_t = theano.shared(convout_c01b.astype(theano.config.floatX))
    imgs_bc01_ca = ca.array(imgs_bc01)
    filters_fc01_ca = ca.array(filters_fc01)
    convout_bc01_ca = ca.array(convout_bc01)

    # Forward propagation
    print('fprop')
    convout_cc_op = FilterActs(stride=1, partial_sum=4, pad=pad)
    convout_cc_expr = convout_cc_op(imgs_c01b_t, filters_c01f_t)
    convout_cc_fun = theano.function([], convout_cc_expr)
    convout_cc = convout_cc_fun()
    convout_cc = np.transpose(convout_cc, (3, 0, 1, 2))

    def convout_ca_fun():
        convout = ca.nnet.conv_bc01(imgs_bc01_ca, filters_fc01_ca, padding,
                                    strides)
        return convout

    convout_ca = np.array(convout_ca_fun())
    print('         correct: ' + str(allclose(convout_ca, convout_cc)))
    duration_cc = avg_running_time(convout_cc_fun)
    duration_ca = avg_running_time(convout_ca_fun)
    print('   avg. duration: cuda_convnet: %.4f  ca: %.4f' %
          (duration_cc, duration_ca))
    print('         speedup: %.2f' % (duration_cc / duration_ca))
    del convout_cc_op
    del convout_cc_expr
    del convout_cc_fun

    #     Back propagation, imgs
    print('bprop_imgs')
    dimgs_cc_op = ImageActs(stride=1, partial_sum=1, pad=pad)
    dimgs_cc_expr = dimgs_cc_op(convout_c01b_t, filters_c01f_t)
    dimgs_cc_fun = theano.function([], dimgs_cc_expr)
    dimgs_cc = dimgs_cc_fun()
    dimgs_cc = np.transpose(dimgs_cc, (3, 0, 1, 2))

    def dimgs_ca_fun():
        return ca.nnet.conv_bc01_bprop_imgs(filters_fc01_ca, convout_bc01_ca,
                                            img_shape, padding, strides)

    dimgs_ca = np.array(dimgs_ca_fun())
    print('         correct: ' + str(allclose(dimgs_ca, dimgs_cc)))
    duration_cc = avg_running_time(dimgs_cc_fun)
    duration_ca = avg_running_time(dimgs_ca_fun)
    print('   avg. duration: cuda_convnet: %.4f  ca: %.4f' %
          (duration_cc, duration_ca))
    print('         speedup: %.2f' % (duration_cc / duration_ca))
    del dimgs_cc_op
    del dimgs_cc_expr
    del dimgs_cc_fun

    # Back propagation, filters
    dfilters_cc_op = WeightActs(stride=1, partial_sum=1, pad=pad)
    dfilters_cc_expr = dfilters_cc_op(imgs_c01b_t, convout_c01b_t,
                                      T.as_tensor_variable(filter_shape))
    dfilters_cc_fun = theano.function([], dfilters_cc_expr)
    dfilters_cc = dfilters_cc_fun()[0]
    dfilters_cc = np.transpose(dfilters_cc, (3, 0, 1, 2))

    def dfilters_ca_fun():
        return ca.nnet.conv_bc01_bprop_filters(imgs_bc01_ca, convout_bc01_ca,
                                               filter_shape, padding, strides)

    dfilters_ca = np.array(dfilters_ca_fun())

    print('bprop_filters')
    print('         correct: ' + str(allclose(dfilters_ca, dfilters_cc)))
    duration_cc = avg_running_time(dfilters_cc_fun)
    duration_ca = avg_running_time(dfilters_ca_fun)
    print('   avg. duration: cuda_convnet: %.4f  ca: %.4f' %
          (duration_cc, duration_ca))
    print('         speedup: %.2f' % (duration_cc / duration_ca))