def prob_max_pool_c01b(c01b, pool_shape, top_down = None):
    if pool_shape[0] != pool_shape[1]:
        raise UnimplementedError("Non sqaure pool shapes are not supported yet")
    assert pool_shape[0] > 0


    ch, zr, zc, batch_size = c01b.shape
    r, c = pool_shape
    if top_down is None:
        top_down = tensor.zeros((ch, zr / r, zc / c, batch_size), dtype = c01b.dtype)

    op = ProbMaxPool(pool_shape[0])
    c01b = gpu_contiguous(c01b)
    top_down = gpu_contiguous(top_down)

    return op(c01b, top_down)
示例#2
0
    def c_code(self, node, name, inputs, outputs, sub):
        hid_acts, filters = inputs
        targets, = outputs
        fail = sub['fail']

        # convFilterActs will multiply targets by scaleTargets
        # then add scaleOutput * (the convolution value)
        # We could make use of this to implement an inplace
        # addconv op but for this op we just want to compute
        # the convolution so we set them to 0 and 1 respectively
        # Note: there is another version of convFilterActs that
        # does not take these arguments, but it is just a wrapper
        # around the version that does take them, so we save
        # a function call by using the version that we use.
        basic_setup = """
        #define scaleTargets 0
        #define scaleOutput 1
        """

        if self.dense_connectivity:
            basic_setup += """
            #define numGroups 1
            """

        basic_setup += """
        #define paddingStart (-%d)
        """ % self.pad

        if self.stride != 1:
            raise UnimplementedError()
        else:
            basic_setup += """
            #define moduleStride 1
        """

        if self.copy_non_contiguous:
            raise UnimplementedError()
        else:
            basic_setup += "#define IMAGEACTS_COPY_NON_CONTIGUOUS 0\n"

        # The amount of braces that must be closed at the end
        num_braces = 0

        # Convert images int nv_hid_acts, an NVMatrix, for compatibility
        # with the cuda-convnet functions
        setup_nv_hid_acts = self._argument_contiguity_check("hid_acts") + """
        if (%(hid_acts)s->nd != 4)
        {
            PyErr_Format(PyExc_ValueError,
                "hid_acts must have nd=4, got nd=%%i", %(hid_acts)s->nd);
            %(fail)s;
        }

        { //setup_nv_hid_acts brace 1
        const int *hid_act_dims = CudaNdarray_HOST_DIMS(%(hid_acts)s);
        const int numFilters = hid_act_dims[0];
        const int hidActsSizeY = hid_act_dims[1];
        const int hidActsSizeX = hid_act_dims[2];
        //printf("hidActs shape: %%d %%d\\n", hidActsSizeY, hidActsSizeX);
        const int batch_size = hid_act_dims[3];
        NVMatrix nv_hid_acts(%(hid_acts)s, numFilters * hidActsSizeY *
                                           hidActsSizeX, batch_size, "image_acts:nv_hid_acts");
        int img_channels = -1;
        """
        num_braces += 1

        # Convert filters into nv_filters, an NVMatrix, for compatibility
        # with the cuda-convnet functions

        setup_nv_filters = self._argument_contiguity_check("filters") + """
        if (%(filters)s->nd != 4)
        {
            PyErr_Format(PyExc_ValueError,
            "filters must have nd=4, got nd=%%i", %(filters)s->nd);
            %(fail)s;
        }

        { // setup_nv_filters brace 1
        const int * filters_dims = CudaNdarray_HOST_DIMS(%(filters)s);
        const int filter_channels = filters_dims[0];
        const int filter_rows = filters_dims[1];
        const int filter_cols = filters_dims[2];
        const int num_filters = filters_dims[3];

        if ((num_filters %% (numGroups * 16)) != 0)
        {
            PyErr_Format(PyExc_ValueError,
            "Each group must have a multiple of 16 channels, but num_filters %%%% (numGroups * 16) = %%d %%%% ( %%d * 16) = %%d.",
            num_filters, numGroups, num_filters %% (numGroups * 16));
            %(fail)s;
        }

        if (filter_rows != filter_cols)
        {
            PyErr_Format(PyExc_ValueError,
            "filter must be square, but have shape (%%d, %%d).",
            filter_rows, filter_cols);
            %(fail)s;
        }

        { // setup_nv_filters brace 2


        NVMatrix nv_filters(%(filters)s, filter_channels * filter_rows *
        filter_cols, num_filters, "img_acts:nv_filters");
        """
        num_braces += 2

        target_rows = "hidActsSizeY + filter_rows - 1 + 2 * paddingStart"
        target_cols = "hidActsSizeX + filter_cols - 1 + 2 * paddingStart"

        setup_nv_targets = """

        int target_dims [] = {
            filter_channels,
            %(target_rows)s,
            %(target_cols)s,
            batch_size };

        #define numModulesY hid_act_dims[1]
        #define numModulesX hid_act_dims[2]

        if (CudaNdarray_prep_output(& %(targets)s, 4, target_dims))
        {
            %(fail)s;
        }

        { // setup_nv_filters brace # 1
        const int imgSizeY = %(target_rows)s;
        const int imgSizeX = %(target_cols)s;

        NVMatrix nv_targets(%(targets)s, target_dims[0] * target_dims[1]
         * target_dims[2], target_dims[3], "image_acts: nv_targets");

        """

        num_braces += 1

        # note: numFilters is not specified here. it is determined by
        # nv_filters.getNumCols()
        #
        # note: the size of the filters is determined by dividing
        # nv_filters.getNumRows() by numFilterColors
        #
        do_convolution = """
        convImgActs(nv_hid_acts, nv_filters, nv_targets,
                    imgSizeY, imgSizeX, numModulesY,
                    paddingStart, moduleStride, filter_channels,
                    numGroups);
        """

        braces = '}' * num_braces

        rval = basic_setup + \
                setup_nv_hid_acts + \
                setup_nv_filters + \
                setup_nv_targets + \
                do_convolution + \
                braces

        rval = rval % locals()

        return rval
示例#3
0
    def c_code(self, node, name, inputs, outputs, sub):
        """
        .. todo::

            WRITEME
        """
        partial_sum = self.partial_sum if self.partial_sum is not None else 0
        images, hid_grads, output_shape = inputs
        weights_grads, partialsum_storage = outputs
        fail = sub['fail']
        pad = self.pad

        # convFilterActs will multiply targets by scaleTargets
        # then add scaleOutput * (the convolution value)
        # We could make use of this to implement an inplace
        # addconv op but for this op we just want to compute
        # the convolution so we set them to 0 and 1 respectively
        # Note: there is another version of convFilterActs that
        # does not take these arguments, but it is just a wrapper
        # around the version that does take them, so we save
        # a function call by using the version that we use.
        basic_setup = """
        #define scaleTargets 0
        #define scaleOutput 1
        """

        if self.dense_connectivity:
            basic_setup += """
            #define numGroups 1
            """

        basic_setup += """
        #define paddingStart (-%(pad)d)
        const int *hid_grads_dims = CudaNdarray_HOST_DIMS(%(hid_grads)s);
        const int hidGradsSizeY = hid_grads_dims[1];
        const int hidGradsSizeX = hid_grads_dims[2];
        const int numModules = hidGradsSizeX * hidGradsSizeY;
        int partialSum = %(partial_sum)d > 0 ? %(partial_sum)d : numModules;

        // using this expression instead of numModules %% partialSum
        // because nvcc+msvc9 yield a strange behaviour when using %%
        if ( numModules - (numModules / partialSum) * partialSum != 0) {
            PyErr_Format(PyExc_ValueError,
                "partialSum must divide numModules, but partialSum=%%d and "
                "numModules=%%d", partialSum, numModules);
            %(fail)s;
        }
        """

        basic_setup += """
        #define moduleStride %d
        """ % self.stride
        if self.copy_non_contiguous:
            raise UnimplementedError()
        else:
            basic_setup += "#define WEIGHTACTS_COPY_NON_CONTIGUOUS 0\n"

        # The amount of braces that must be closed at the end
        num_braces = 0

        # Convert images int nv_images, an NVMatrix, for compatibility
        # with the cuda-convnet functions
        setup_nv_images = self._argument_contiguity_check("images") + """
        if (%(images)s->nd != 4)
        {
            PyErr_Format(PyExc_ValueError,
                "images must have nd=4, got nd=%%i", %(images)s->nd);
            %(fail)s;
        }
        { //setup_nv_images brace 1
        const int * images_dims = CudaNdarray_HOST_DIMS(%(images)s);
        const int img_channels = images_dims[0];
        if (img_channels > 3 && img_channels %% 4 != 0)
        {
            PyErr_Format(PyExc_ValueError,
                "images must have 3 or fewer channels, or have a multiple of 4 channels, got %%i",
                img_channels);
            %(fail)s;
        }

        { //setup_nv_images brace 2
        const int * hid_grads_dims = CudaNdarray_HOST_DIMS(%(hid_grads)s);
        const int imgSizeY = images_dims[1];
        const int imgSizeX = images_dims[2];
        const int batch_size = images_dims[3];
        NVMatrix nv_images(%(images)s, img_channels * imgSizeY * imgSizeX, batch_size, "weight_acts: nv_images");
        """
        num_braces += 2

        # Convert hid_grads int nv_hid_grads, an NVMatrix, for compatibility
        # with the cuda-convnet functions
        setup_nv_hid_grads = self._argument_contiguity_check("hid_grads") + """
        if (%(hid_grads)s->nd != 4)
        {
            PyErr_Format(PyExc_ValueError,
                "hid_grads must have nd=4, got nd=%%i", %(hid_grads)s->nd);
            %(fail)s;
        }

        { //setup_nv_hid_grads brace 1
        const int numFilters = hid_grads_dims[0];
        const int batch_size = hid_grads_dims[3];
        NVMatrix nv_hid_grads(%(hid_grads)s, numFilters * hidGradsSizeY *
                                           hidGradsSizeX, batch_size, "weight_acts:nv_hid_grads");
        """
        num_braces += 1

        setup_nv_weights_grads = """
        int filters_dims[4];
        // filters:  (input channels, filter rows, filter cols, output channels)

        npy_intp *shape_dims = PyArray_DIMS(%(output_shape)s);
        npy_intp target_rows, target_cols;
        PyArrayObject *casted_shape;
        PyArray_Descr *intp_dtype;
        if (PyArray_NDIM(%(output_shape)s) != 1) {
            PyErr_Format(PyExc_ValueError,
                         "output shape must be a vector, got %%d-tensor",
                         PyArray_NDIM(%(output_shape)s));
            %(fail)s;
        }
        else if (shape_dims[0] != 2)
        {
            PyErr_Format(PyExc_ValueError,
                         "output shape must be length 2, got %%d",
                         (int)shape_dims[0]);
            %(fail)s;
        }
        else if ((PyArray_DESCR(%(output_shape)s))->kind != 'i' &&
                 (PyArray_DESCR(%(output_shape)s))->kind != 'u')
        {
            PyErr_SetString(PyExc_TypeError,
                            "output shape must have integer or uint dtype");
            %(fail)s;
        }
        intp_dtype = PyArray_DescrFromType(NPY_INTP);
        casted_shape = (PyArrayObject *)PyArray_CastToType(%(output_shape)s,
                                                           intp_dtype, 0);
        target_rows = *((npy_intp *)PyArray_GETPTR1(casted_shape, 0));
        target_cols = *((npy_intp *)PyArray_GETPTR1(casted_shape, 1));
        filters_dims[0] = img_channels;
        filters_dims[1] = target_rows;
        filters_dims[2] = target_cols;
        if (filters_dims[1] != filters_dims[2])
        {
            PyErr_Format(PyExc_ValueError,
            "filter must be square, but have shape (%%d, %%d).",
            filters_dims[1], filters_dims[2]);
            %(fail)s;
        }
        else if (moduleStride > filters_dims[1]) {
            PyErr_Format(PyExc_ValueError,
            "stride %%d greater than filter size (%%d, %%d)",
            moduleStride, filters_dims[1], filters_dims[2]);
            %(fail)s;
        }
        filters_dims[3] = numFilters;
        const int filterSize = filters_dims[1];
        int partialsum_storage_dims[5];
        for (int i = 1; i < 5; i++)
        {
            partialsum_storage_dims[i] = filters_dims[i - 1];
        }
        partialsum_storage_dims[0] = numModules / partialSum;
        if (partialSum != numModules &&
            CudaNdarray_prep_output(&%(partialsum_storage)s, 5,
                                    partialsum_storage_dims))
        {
            %(fail)s;
        }

        for (int i = 0; i < 4; i++)
        {
            if (filters_dims[i] <= 0)
            {
                printf("filters_dims[%%d] = %%d\\n", i, filters_dims[i]);
                assert(false);
            }
        }
        if (CudaNdarray_prep_output(& %(weights_grads)s, 4, filters_dims))
        {
            %(fail)s;
        }

        { // setup_nv_weights_grad brace # 1

        NVMatrix nv_weights_grads(%(weights_grads)s, filters_dims[0] * filterSize
                                  * filterSize, numFilters,
                                  "weight_acts:nv_weights_grads");

        """

        num_braces += 1

        # note: imgSizeX is not specified here, it is computed internally
        # (in _filterActsSparse) by the lines:
        # int imgPixels = images.getNumRows() / numImgColors;
        # int imgSizeX = imgPixels / imgSizeY;
        #
        # note: numFilters is not specified here. it is determined by
        # nv_filters.getNumCols()
        #
        # note: the size of the filters is determined by dividing
        # nv_filters.getNumRows() by numFilterColors
        #
        run_kernel = """

        if (partialSum == numModules)
            _weightActs(nv_images, nv_hid_grads, nv_weights_grads,
                        imgSizeY, hidGradsSizeY, hidGradsSizeX, filterSize,
                        paddingStart, moduleStride, img_channels, numGroups,
                        partialSum, 0, 1);
        else {
            NVMatrix nv_partialsum(%(partialsum_storage)s, (numModules / partialSum) *
                     filters_dims[0] * filterSize * filterSize, numFilters,
                     "weight_acts: nv_partialsum");
            _weightActs(nv_images, nv_hid_grads, nv_partialsum,
                        imgSizeY, hidGradsSizeY, hidGradsSizeX, filterSize,
                        paddingStart, moduleStride, img_channels, numGroups,
                        partialSum, 0, 1);
            nv_partialsum.reshape((numModules / partialSum), filters_dims[0] * filterSize * filterSize * numFilters);

            // sum out axis 0 of nv_partialsum
            #define AXIS 0
            // scale the contents of nv_weights_grads by 0
            // i.e., clear out its pre-existing content
            #define SCALE_THIS 0
            // scale the new sum by 1, i.e., don't do any scaling
            #define SCALE_SUM 1
            nv_weights_grads.addSum(nv_partialsum, AXIS, SCALE_THIS, SCALE_SUM);
        }
        """

        braces = '}' * num_braces

        rval = (basic_setup + setup_nv_images + setup_nv_hid_grads +
                setup_nv_weights_grads + run_kernel + braces)

        rval = render_string(rval, locals())

        return rval
    def c_code(self, node, name, inputs, outputs, sub):
        p, h, gp, gh, gp_iszero, gh_iszero = inputs
        targets_z, targets_t, = outputs
        fail = sub['fail']

        # The amount of braces that must be closed at the end
        num_braces = 0

        if self.copy_non_contiguous:
            raise UnimplementedError()
        else:
            basic_setup = "#define PROBMAXPOOLGRAD_COPY_NON_CONTIGUOUS 0\n"

        # Convert images in nv_images, an NVMatrix, for compatibility
        # with the cuda-convnet functions
        setup_nv_h = self._argument_contiguity_check("h") + """
        if (%(h)s->nd != 4)
        {
            PyErr_Format(PyExc_ValueError,
                "h must have nd=4, got nd=%%i", %(h)s->nd);
            %(fail)s;
        }

        { //setup_nv_images brace 1

        const int * images_dims = CudaNdarray_HOST_DIMS(%(h)s);
        const int img_channels = images_dims[0];
        const int imgSizeY = images_dims[1];
        const int imgSizeX = images_dims[2];
        const int batch_size = images_dims[3];

        if(imgSizeY != imgSizeX){
            PyErr_Format(PyExc_ValueError,
                "images must be square(dims[1] == dims[2]). Shape (%%i,%%i,%%i,%%i)",
                img_channels, imgSizeY, imgSizeX, batch_size);
            %(fail)s;
        }
        if(%(ds)s > imgSizeY){
            PyErr_Format(PyExc_ValueError,
                "ds(%%d) must be <= imgSizeX(%%d) and imgSizeY(%%d).",
                %(ds)s, imgSizeX, imgSizeY);
            %(fail)s;
        }
        if (CudaNdarray_HOST_DIMS(%(h)s)[0] %% 16 != 0)
        {
            PyErr_Format(PyExc_ValueError,
                "h must have a number of channels that is a multiple of 16. Got %%d",
                CudaNdarray_HOST_DIMS(%(gh)s)[0]);
            %(fail)s;
        }


        NVMatrix nv_h(%(h)s, img_channels * imgSizeY * imgSizeX,
                          batch_size, "ProbMaxPool:nv_h");

        """
        num_braces += 1

        setup_nv_p = self._argument_contiguity_check("p") + """
        if (%(p)s->nd != 4)
        {
            PyErr_Format(PyExc_ValueError,
                "P must have nd=4, got nd=%%i", %(p)s->nd);
            %(fail)s;
        }

        { //setup_nv_images brace 1

        int _outputsX = ((int)(ceil((imgSizeY - %(start)s - %(ds)s) / ((float)%(stride)s)))) + 1;


        NVMatrix nv_p(%(p)s, img_channels * _outputsX * _outputsX, batch_size,
        "ProbMaxPool:nv_p");
        """
        num_braces += 1

        # Convert gh in nv_gh
        setup_nv_gh = self._argument_contiguity_check("gh") + """
        if (%(gh)s->nd != 4)
        {
            PyErr_Format(PyExc_ValueError,
                "gh must have nd=4, got nd=%%i", %(gh)s->nd);
            %(fail)s;
        }
        if (CudaNdarray_HOST_DIMS(%(gh)s)[0] %% 16 != 0)
        {
            PyErr_Format(PyExc_ValueError,
                "gh must have a number of channels that is a multiple of 16. Got %%d",
                CudaNdarray_HOST_DIMS(%(gh)s)[0]);
            %(fail)s;
        }

        { //setup_nv_gh brace 1

        const int * gh_dims = CudaNdarray_HOST_DIMS(%(gh)s);
        const int gh_channels = gh_dims[0];
        const int ghSizeY = gh_dims[1];
        const int ghSizeX = gh_dims[2];

        NVMatrix nv_gh(%(gh)s, gh_channels * ghSizeY * ghSizeX,
                       batch_size, "ProbMaxPool:nv_gh");
        """
        num_braces += 1

        setup_nv_gp = self._argument_contiguity_check("gp") + """
        if (%(gp)s->nd != 4)
        {
            PyErr_Format(PyExc_ValueError,
                "gp must have nd=4, got nd=%%i", %(gp)s->nd);
            %(fail)s;
        }

        { //setup_nv_images brace 1

        int _outputsX = ((int)(ceil((imgSizeY - %(start)s - %(ds)s) / ((float)%(stride)s)))) + 1;


        NVMatrix nv_gp(%(gp)s, img_channels * _outputsX * _outputsX, batch_size,
        "ProbMaxPool:nv_gp");
        """
        num_braces += 1

        setup_nv_targets_z = """
        int target_z_dims [] = {
            img_channels,
            imgSizeX,
            imgSizeY,
            batch_size };

        if (CudaNdarray_prep_output(& %(targets_z)s, 4, target_z_dims))
        {
            %(fail)s;
        }

        { // setup_nv_target brace # 1

        NVMatrix nv_targets_z(%(targets_z)s,
                            target_z_dims[0] * target_z_dims[1] * target_z_dims[2],
                            target_z_dims[3], "ProbMaxPool:nv_targets_z");

        """

        num_braces += 1

        setup_nv_targets_t = """
        int target_t_dims [] = {
            img_channels,
            _outputsX,
            _outputsX,
            batch_size };

        if (CudaNdarray_prep_output(& %(targets_t)s, 4, target_t_dims))
        {
            %(fail)s;
        }

        { // setup_nv_target brace # 1

        NVMatrix nv_targets_t(%(targets_t)s, target_t_dims[0] * target_t_dims[1] * target_t_dims[2],
                            target_t_dims[3], "ProbMaxPool:nv_targets_t");


        float * gp_iszero = CudaNdarray_DEV_DATA(%(gp_iszero)s);
        float * gh_iszero = CudaNdarray_DEV_DATA(%(gh_iszero)s);
        """
        num_braces += 1

        undo_pool = """
        localProbMaxUndo(nv_h, nv_p, nv_gh, nv_gp, nv_targets_z, nv_targets_t,
                         %(ds)s, %(start)s, %(stride)s, _outputsX, imgSizeX, gp_iszero, gh_iszero);
        """

        braces = '}' * num_braces

        rval = (basic_setup + setup_nv_h + setup_nv_p + setup_nv_gh +
                setup_nv_gp + setup_nv_targets_z + setup_nv_targets_t +
                undo_pool + braces)
        start = self.start
        stride = self.stride
        ds = self.ds
        rval = rval % locals()

        return rval
    def c_code(self, node, name, inputs, outputs, sub):
        images, top_down = inputs
        ptargets, htargets = outputs
        fail = sub['fail']

        # The amount of braces that must be closed at the end
        num_braces = 0

        if self.copy_non_contiguous:
            raise UnimplementedError()
        else:
            basic_setup = "#define PROBMAXPOOL_COPY_NON_CONTIGUOUS 0\n"

        # Convert images in nv_images, an NVMatrix, for compatibility
        # with the cuda-convnet functions
        setup_nv_images = self._argument_contiguity_check("images") + """
        if (%(images)s->nd != 4)
        {
            PyErr_Format(PyExc_ValueError,
                "images must have nd=4, got nd=%%i", %(images)s->nd);
            %(fail)s;
        }

        { //setup_nv_images brace 1

        const int * images_dims = CudaNdarray_HOST_DIMS(%(images)s);
        const int img_channels = images_dims[0];
        const int imgSizeY = images_dims[1];
        const int imgSizeX = images_dims[2];
        const int batch_size = images_dims[3];

        if(imgSizeY != imgSizeX){
            PyErr_Format(PyExc_ValueError,
                "images must be square(dims[1] == dims[2]). Shape (%%i,%%i,%%i,%%i)",
                img_channels, imgSizeY, imgSizeX, batch_size);
            %(fail)s;
        }
        if(%(ds)s > imgSizeY){
            PyErr_Format(PyExc_ValueError,
                "ds(%%d) must be <= imgSizeX(%%d) and imgSizeY(%%d).",
                %(ds)s, imgSizeX, imgSizeY);
            %(fail)s;
        }
        if(%(start)s >= imgSizeX){
            PyErr_Format(PyExc_ValueError,
                "start is %%d but must be smaller then the images size of %%d x %%d.",
                %(start)s, imgSizeX, imgSizeY);
            %(fail)s;
        }

        NVMatrix nv_images(%(images)s, img_channels * imgSizeY * imgSizeX, batch_size,
        "ProbMaxPool:nv_images");
        """
        num_braces += 1

        # TODO check if stride != pool shape works, if not put error check
        setup_nv_top_down = self._argument_contiguity_check("top_down") + """
        if (%(top_down)s->nd != 4)
        {
            PyErr_Format(PyExc_ValueError,
                "top_down must have nd=4, got nd=%%i", %(images)s->nd);
            %(fail)s;
        }

        { //setup_nv_images brace 1

        int _outputsX = ((int)(ceil((imgSizeY - %(start)s - %(ds)s) / ((float)%(stride)s)))) + 1;


        NVMatrix nv_top_down(%(top_down)s, img_channels * _outputsX * _outputsX, batch_size,
        "ProbMaxPool:nv_top_down");
        """
        num_braces += 1

        setup_nv_ptargets = """
        //int _outputsX = ((int)(ceil((imgSizeY - %(start)s - %(ds)s) / ((float)%(stride)s)))) + 1;

        int target_dims [] = {
            img_channels,
            _outputsX,
            _outputsX,
            batch_size };

        if (CudaNdarray_prep_output(& %(ptargets)s, 4, target_dims))
        {
            %(fail)s;
        }

        { // setup_nv_target brace # 1

        NVMatrix nv_ptargets(%(ptargets)s, target_dims[0] * target_dims[1] * target_dims[2],
                            target_dims[3], "ProbMaxPool:nv_ptargets");

        """
        num_braces += 1

        setup_nv_htargets = """
        int target_dims [] = {
            img_channels,
            imgSizeX,
            imgSizeY,
            batch_size };

        if (CudaNdarray_prep_output(& %(htargets)s, 4, target_dims))
        {
            %(fail)s;
        }

        { // setup_nv_target brace # 1

        NVMatrix nv_htargets(%(htargets)s, target_dims[0] * target_dims[1] * target_dims[2],
                            target_dims[3], "ProbMaxPool:nv_htargets");

        """
        num_braces += 1

        do_pool = """
        probabilisticPool(nv_images, nv_top_down, nv_ptargets, nv_htargets, img_channels, %(ds)s,
                      %(start)s, %(stride)s, _outputsX, MaxPooler());
        """

        braces = '}' * num_braces

        rval = (basic_setup + setup_nv_images + setup_nv_top_down +
                setup_nv_ptargets + setup_nv_htargets + do_pool + braces)
        start = self.start
        stride = self.stride
        ds = self.ds
        rval = rval % locals()

        return rval
示例#6
0
    def c_code(self, node, name, inputs, outputs, sub):
        """
        .. todo::

            WRITEME
        """
        images, filters = inputs
        targets, = outputs
        fail = sub['fail']

        # convFilterActs will multiply targets by scaleTargets
        # then add scaleOutput * (the convolution value)
        # We could make use of this to implement an inplace
        # addconv op but for this op we just want to compute
        # the convolution so we set them to 0 and 1 respectively
        # Note: there is another version of convFilterActs that
        # does not take these arguments, but it is just a wrapper
        # around the version that does take them, so we save
        # a function call by using the version that we use.
        basic_setup = """
        #define scaleTargets 0
        #define scaleOutput 1
        """

        if self.dense_connectivity:
            basic_setup += """
            #define numGroups 1
            """

        assert isinstance(self.pad, py_integer_types)
        assert self.pad >= 0, "pad must be non-negative"
        basic_setup += """
        #define paddingStart (-%d)
        """ % self.pad

        basic_setup += """
        #define moduleStride %d
        """ % int(self.stride)
        if self.copy_non_contiguous:
            raise UnimplementedError()
        else:
            basic_setup += "#define FILTERACTS_COPY_NON_CONTIGUOUS 0\n"

        # The amount of braces that must be closed at the end
        num_braces = 0

        # Convert images int nv_images, an NVMatrix, for compatibility
        # with the cuda-convnet functions
        setup_nv_images = self._argument_contiguity_check("images") + """
        if (%(images)s->nd != 4)
        {
            PyErr_Format(PyExc_ValueError,
                "images must have nd=4, got nd=%%i", %(images)s->nd);
            %(fail)s;
        }

        { //setup_nv_images brace 1
        const int * images_dims = CudaNdarray_HOST_DIMS(%(images)s);
        const int img_channels = images_dims[0];
        const int imgSizeY = images_dims[1];
        const int imgSizeX = images_dims[2];
        const int batch_size = images_dims[3];
        NVMatrix nv_images(%(images)s, img_channels * imgSizeY * imgSizeX, batch_size,
        "filter_acts:nv_images");
        """
        num_braces += 1

        # Convert filters into nv_filters, an NVMatrix, for compatibility
        # with the cuda-convnet functions
        setup_nv_filters = self._argument_contiguity_check("filters") + """
        if (%(filters)s->nd != 4)
        {
            PyErr_Format(PyExc_ValueError,
            "filters must have nd=4, got nd=%%i", %(filters)s->nd);
            %(fail)s;
        }

        { // setup_nv_filters brace 1
        const int * filters_dims = CudaNdarray_HOST_DIMS(%(filters)s);
        const int filter_channels = filters_dims[0];
        const int filter_rows = filters_dims[1];
        const int filter_cols = filters_dims[2];
        const int num_filters = filters_dims[3];

        if (numGroups * filter_channels != img_channels)
        {
            PyErr_Format(PyExc_ValueError,
            "# input channels mismatch. images have %%d but filters have %%d groups of %%d for a total of %%d.",
            img_channels, numGroups, filter_channels, numGroups * filter_channels);
            %(fail)s;
        }

        if ((num_filters %% (numGroups * 16)) != 0)
        {
            PyErr_Format(PyExc_ValueError,
            "Each group must have a multiple of 16 channels, but num_filters %%%% (numGroups * 16) = %%d %%%% ( %%d * 16) = %%d.",
            num_filters, numGroups, num_filters %% (numGroups * 16));
            %(fail)s;
        }

        if (filter_rows != filter_cols)
        {
            PyErr_Format(PyExc_ValueError,
            "filter must be square, but instead have shape (%%d, %%d)",
            filter_rows, filter_cols);
            %(fail)s;
        }
        else if (moduleStride > filter_rows) {
            PyErr_Format(PyExc_ValueError,
            "stride %%d greater than filter size (%%d, %%d)",
            moduleStride, filter_rows, filter_cols);
            %(fail)s;
        }

        { // setup_nv_filters brace 2


        NVMatrix nv_filters(%(filters)s, filter_channels * filter_rows *
        filter_cols, num_filters, "filter_acts:nv_filters");
        """
        num_braces += 2

        # p + (m_x - 1) * s + f >= i_x
        # p + (m_x - 1) * s >= i_x - f
        # m_x = (i_x - f - p) / s + 1
        div_ms_y = "((imgSizeY - 2*paddingStart - filter_rows) / moduleStride)"
        div_ms_x = "((imgSizeX - 2*paddingStart - filter_cols) / moduleStride)"
        mod_ms_y = "((imgSizeY - 2*paddingStart - filter_rows) % moduleStride)"
        mod_ms_x = "((imgSizeX - 2*paddingStart - filter_cols) % moduleStride)"
        target_rows = "%s + ((%s > 0) ? 1 : 0) + 1" % (div_ms_y, mod_ms_y)
        target_cols = "%s + ((%s > 0) ? 1 : 0) + 1" % (div_ms_x, mod_ms_x)

        setup_nv_targets = """


        int target_dims [] = {
            num_filters,
            %(target_rows)s,
            %(target_cols)s,
            batch_size };

        #define numModulesY target_dims[1]
        #define numModulesX target_dims[2]

        if (CudaNdarray_prep_output(& %(targets)s, 4, target_dims))
        {
            %(fail)s;
        }

        { // setup_nv_filters brace # 1

        NVMatrix nv_targets(%(targets)s, target_dims[0] * target_dims[1]
         * target_dims[2], target_dims[3], "filter_acts:nv_targets");

        """

        num_braces += 1

        # note: imgSizeX is not specified here, it is computed internally
        # (in _filterActsSparse) by the lines:
        # int imgPixels = images.getNumRows() / numImgColors;
        # int imgSizeX = imgPixels / imgSizeY;
        #
        # note: numFilters is not specified here. it is determined by
        # nv_filters.getNumCols()
        #
        # note: the size of the filters is determined by dividing
        # nv_filters.getNumRows() by numFilterColors
        #
        do_convolution = """
        convFilterActs(nv_images, nv_filters, nv_targets,
                       imgSizeY, numModulesY, numModulesX,
                       paddingStart, moduleStride, img_channels,
                       numGroups, scaleTargets, scaleOutput);
        """

        braces = '}' * num_braces

        rval = basic_setup + \
                setup_nv_images + \
                setup_nv_filters + \
                setup_nv_targets + \
                do_convolution + \
                braces

        rval = rval % locals()

        return rval
示例#7
0
文件: pool.py 项目: zizu1985/pylearn2
    def c_code(self, node, name, inputs, outputs, sub):
        """
        .. todo::

            WRITEME
        """
        images, maxout, gz = inputs
        targets, = outputs
        fail = sub['fail']

        # The amount of braces that must be closed at the end
        num_braces = 0

        if self.copy_non_contiguous:
            raise UnimplementedError()
        else:
            basic_setup = "#define MAXPOOLGRAD_COPY_NON_CONTIGUOUS 0\n"

        # Convert images in nv_images, an NVMatrix, for compatibility
        # with the cuda-convnet functions
        setup_nv_images = self._argument_contiguity_check("images") + """
        if (%(images)s->nd != 4)
        {
            PyErr_Format(PyExc_ValueError,
                "images must have nd=4, got nd=%%i", %(images)s->nd);
            %(fail)s;
        }

        { //setup_nv_images brace 1

        const int * images_dims = CudaNdarray_HOST_DIMS(%(images)s);
        const int img_channels = images_dims[0];
        const int imgSizeY = images_dims[1];
        const int imgSizeX = images_dims[2];
        const int batch_size = images_dims[3];

        if(imgSizeY != imgSizeX){
            PyErr_Format(PyExc_ValueError,
                "images must be square(dims[1] == dims[2]). Shape (%%i,%%i,%%i,%%i)",
                img_channels, imgSizeY, imgSizeX, batch_size);
            %(fail)s;
        }
        if(%(ds)s > imgSizeY){
            PyErr_Format(PyExc_ValueError,
                "ds(%%d) must be <= imgSizeX(%%d) and imgSizeY(%%d).",
                %(ds)s, imgSizeX, imgSizeY);
            %(fail)s;
        }

        NVMatrix nv_images(%(images)s, img_channels * imgSizeY * imgSizeX, batch_size,
        "MaxPool:nv_images");
        """
        num_braces += 1

        # Convert maxout in nv_maxout
        setup_nv_maxout = self._argument_contiguity_check("maxout") + """
        if (%(maxout)s->nd != 4)
        {
            PyErr_Format(PyExc_ValueError,
                "maxout must have nd=4, got nd=%%i", %(maxout)s->nd);
            %(fail)s;
        }

        { //setup_nv_maxout brace 1

        const int * maxout_dims = CudaNdarray_HOST_DIMS(%(maxout)s);
        const int maxout_channels = maxout_dims[0];
        const int maxoutSizeY = maxout_dims[1];
        const int maxoutSizeX = maxout_dims[2];

        if(maxoutSizeY != maxoutSizeX){
            PyErr_Format(PyExc_ValueError,
                "maxout must be square(dims[1] == dims[2])."
                " Shape (%%i,%%i,%%i,%%i)",
                maxout_channels, maxoutSizeY, maxoutSizeX, batch_size);
            %(fail)s;
        }
        if(img_channels != maxout_channels){
            PyErr_Format(PyExc_ValueError,
                "img_channels(%%d) should be equal to maxout_channels(%%d).",
                img_channels, maxout_channels);
            %(fail)s;
        }
        if(maxout_dims[3] != batch_size){
            PyErr_Format(PyExc_ValueError,
                "batch_size(%%d) should be equal to maxout_dims[3](%%d)",
                batch_size, maxout_dims[3]);
            %(fail)s;
        }

       NVMatrix nv_maxout(%(maxout)s, img_channels * maxoutSizeY * maxoutSizeX,
                          batch_size, "MaxPool:nv_maxout");
        """
        num_braces += 1

        # Convert gz in nv_gz
        setup_nv_gz = self._argument_contiguity_check("gz") + """
        if (%(gz)s->nd != 4)
        {
            PyErr_Format(PyExc_ValueError,
                "gz must have nd=4, got nd=%%i", %(gz)s->nd);
            %(fail)s;
        }
        if (CudaNdarray_HOST_DIMS(%(gz)s)[0] %% 16 != 0)
        {
            PyErr_Format(PyExc_ValueError,
                "gz must have a number of channels that is a multiple of 16. Got %%d",
                CudaNdarray_HOST_DIMS(%(gz)s)[0]);
            %(fail)s;
        }

        { //setup_nv_gz brace 1

        const int * gz_dims = CudaNdarray_HOST_DIMS(%(gz)s);
        const int gz_channels = gz_dims[0];
        const int gzSizeY = gz_dims[1];
        const int gzSizeX = gz_dims[2];

        if(maxout_dims[0] != gz_dims[0] ||
           maxout_dims[1] != gz_dims[1] ||
           maxout_dims[2] != gz_dims[2] ||
           maxout_dims[3] != gz_dims[3]){
            PyErr_Format(PyExc_ValueError,
                "gz shape(%%d, %%d, %%d, %%d) must be the same"
                " as maxout(%%d, %%d, %%d, %%d)",
                maxout_dims[0], maxout_dims[1], maxout_dims[2], maxout_dims[3],
                gz_dims[0], gz_dims[1], gz_dims[2], gz_dims[3]);
            %(fail)s;
        }

        NVMatrix nv_gz(%(gz)s, img_channels * maxoutSizeY * maxoutSizeX,
                       batch_size, "MaxPool:nv_gz");
        """
        num_braces += 1

        setup_nv_targets = """
        //int _outputsX = int(ceil((dic['imgSize'] - dic['start'] - dic['sizeX']) / float(dic['stride']))) + 1;
        int _outputsX = ((int)(ceil((imgSizeY - %(start)s - %(ds)s) / ((float)%(stride)s)))) + 1;

        int target_dims [] = {
            img_channels,
            imgSizeX,
            imgSizeY,
            batch_size };

        if (CudaNdarray_prep_output(& %(targets)s, 4, target_dims))
        {
            %(fail)s;
        }

        { // setup_nv_target brace # 1

        NVMatrix nv_targets(%(targets)s,
                            target_dims[0] * target_dims[1] * target_dims[2],
                            target_dims[3], "MaxPool:nv_targets");

        """

        num_braces += 1

        undo_pool = """
        convLocalMaxUndo(nv_images, nv_gz, nv_maxout, nv_targets,
                         %(ds)s, %(start)s, %(stride)s, _outputsX, 0, 1);
        """

        braces = '}' * num_braces

        rval = (basic_setup + setup_nv_images + setup_nv_maxout + setup_nv_gz +
                setup_nv_targets + undo_pool + braces)
        start = self.start
        stride = self.stride
        ds = self.ds
        rval = rval % locals()

        return rval
示例#8
0
    def c_code(self, node, name, inputs, outputs, sub):
        """
        .. todo::

            WRITEME
        """
        hid_acts, filters, output_shape = inputs
        targets, = outputs
        fail = sub['fail']

        # convFilterActs will multiply targets by scaleTargets
        # then add scaleOutput * (the convolution value)
        # We could make use of this to implement an inplace
        # addconv op but for this op we just want to compute
        # the convolution so we set them to 0 and 1 respectively
        # Note: there is another version of convFilterActs that
        # does not take these arguments, but it is just a wrapper
        # around the version that does take them, so we save
        # a function call by using the version that we use.
        basic_setup = """
        #define scaleTargets 0
        #define scaleOutput 1
        """

        if self.dense_connectivity:
            basic_setup += """
            #define numGroups 1
            """

        basic_setup += """
        #define paddingStart (-%d)
        """ % self.pad

        basic_setup += """
        #define moduleStride %d
        """ % self.stride

        if self.copy_non_contiguous:
            raise UnimplementedError()
        else:
            basic_setup += "#define IMAGEACTS_COPY_NON_CONTIGUOUS 0\n"

        # The amount of braces that must be closed at the end
        num_braces = 0

        # Convert images int nv_hid_acts, an NVMatrix, for compatibility
        # with the cuda-convnet functions
        setup_nv_hid_acts = self._argument_contiguity_check("hid_acts") + """
        if (%(hid_acts)s->nd != 4)
        {
            PyErr_Format(PyExc_ValueError,
                "hid_acts must have nd=4, got nd=%%i", %(hid_acts)s->nd);
            %(fail)s;
        }

        { //setup_nv_hid_acts brace 1
        const int *hid_act_dims = CudaNdarray_HOST_DIMS(%(hid_acts)s);
        const int numFilters = hid_act_dims[0];
        const int hidActsSizeY = hid_act_dims[1];
        const int hidActsSizeX = hid_act_dims[2];
        //printf("hidActs shape: %%d %%d\\n", hidActsSizeY, hidActsSizeX);
        const int batch_size = hid_act_dims[3];
        NVMatrix nv_hid_acts(%(hid_acts)s, numFilters * hidActsSizeY *
                                           hidActsSizeX, batch_size, "image_acts:nv_hid_acts");
        int img_channels = -1;
        """
        num_braces += 1

        # Convert filters into nv_filters, an NVMatrix, for compatibility
        # with the cuda-convnet functions

        setup_nv_filters = self._argument_contiguity_check("filters") + """
        if (%(filters)s->nd != 4)
        {
            PyErr_Format(PyExc_ValueError,
            "filters must have nd=4, got nd=%%i", %(filters)s->nd);
            %(fail)s;
        }

        { // setup_nv_filters brace 1
        const int * filters_dims = CudaNdarray_HOST_DIMS(%(filters)s);
        const int filter_channels = filters_dims[0];
        const int filter_rows = filters_dims[1];
        const int filter_cols = filters_dims[2];
        const int num_filters = filters_dims[3];

        if ((num_filters %% (numGroups * 16)) != 0)
        {
            PyErr_Format(PyExc_ValueError,
            "Each group must have a multiple of 16 channels, but num_filters %%%% (numGroups * 16) = %%d %%%% ( %%d * 16) = %%d.",
            num_filters, numGroups, num_filters %% (numGroups * 16));
            %(fail)s;
        }

        if (filter_rows != filter_cols)
        {
            PyErr_Format(PyExc_ValueError,
            "filter must be square, but have shape (%%d, %%d).",
            filter_rows, filter_cols);
            %(fail)s;
        }
        else if (moduleStride > filter_rows) {
            PyErr_Format(PyExc_ValueError,
            "stride %%d greater than filter size (%%d, %%d)",
            moduleStride, filter_rows, filter_cols);
            %(fail)s;
        }

        { // setup_nv_filters brace 2


        NVMatrix nv_filters(%(filters)s, filter_channels * filter_rows *
        filter_cols, num_filters, "img_acts:nv_filters");
        """
        num_braces += 2

        #target_rows = "(hidActsSizeY + filter_rows + 2 * paddingStart) * moduleStride - 1"
        #target_cols = "(hidActsSizeX + filter_cols + 2 * paddingStart) * moduleStride - 1"

        setup_nv_targets = """

        #define numModulesY hid_act_dims[1]
        #define numModulesX hid_act_dims[2]
        npy_intp *shape_dims = PyArray_DIMS(%(output_shape)s);
        npy_intp target_rows, target_cols;
        PyArrayObject *casted_shape;
        PyArray_Descr *intp_dtype;
        if (PyArray_NDIM(%(output_shape)s) != 1) {
            PyErr_Format(PyExc_ValueError,
                         "output shape must be a vector, got %%d-tensor",
                         PyArray_NDIM(%(output_shape)s));
            %(fail)s;
        }
        else if (shape_dims[0] != 2)
        {
            PyErr_Format(PyExc_ValueError,
                         "output shape must be length 2, got %%d",
                         (int)shape_dims[0]);
            %(fail)s;
        }
        else if ((PyArray_DESCR(%(output_shape)s))->kind != 'i' &&
                 (PyArray_DESCR(%(output_shape)s))->kind != 'u')
        {
            PyErr_SetString(PyExc_TypeError,
                            "output shape must have integer or uint dtype");
            %(fail)s;
        }
        intp_dtype = PyArray_DescrFromType(NPY_INTP);
        casted_shape = (PyArrayObject *)PyArray_CastToType(%(output_shape)s,
                                                           intp_dtype, 0);
        target_rows = *((npy_intp *)PyArray_GETPTR1(casted_shape, 0));
        target_cols = *((npy_intp *)PyArray_GETPTR1(casted_shape, 1));
        {
        int target_dims [] = {
            filter_channels,
            target_rows,
            target_cols,
            batch_size };
        #define filterSize filter_rows
        #define MAX_ROWS (paddingStart + (numModulesY-1) * moduleStride + filterSize)
        if ((target_rows > MAX_ROWS)
            || (paddingStart + (numModulesX-1) * moduleStride + filterSize < target_cols))
        {
            PyErr_Format(PyExc_ValueError, "pylearn2.sandbox.cuda_convnet.image_acts.ImageActs: incompatible target image size (%%d, %%d), maximum (%%d, %%d)",
                         (int)target_rows, (int)target_cols,
                         (int)MAX_ROWS,
                         (int)(paddingStart + (numModulesX-1) * moduleStride + filterSize));
            %(fail)s;
        }
        if (CudaNdarray_prep_output(& %(targets)s, 4, target_dims))
        {
            %(fail)s;
        }

        { // setup_nv_filters brace # 1
        const int imgSizeY = (int)target_rows;
        const int imgSizeX = (int)target_cols;

        NVMatrix nv_targets(%(targets)s, target_dims[0] * target_dims[1]
         * target_dims[2], target_dims[3], "image_acts: nv_targets");

        """

        num_braces += 2

        # note: numFilters is not specified here. it is determined by
        # nv_filters.getNumCols()
        #
        # note: the size of the filters is determined by dividing
        # nv_filters.getNumRows() by numFilterColors
        #
        do_convolution = """
        convImgActs(nv_hid_acts, nv_filters, nv_targets,
                    imgSizeY, imgSizeX, numModulesY,
                    paddingStart, moduleStride, filter_channels,
                    numGroups);
        """

        braces = '}' * num_braces

        rval = basic_setup + \
                setup_nv_hid_acts + \
                setup_nv_filters + \
                setup_nv_targets + \
                do_convolution + \
                braces

        rval = rval % locals()

        return rval
示例#9
0
    def c_code(self, node, name, inputs, outputs, sub):
        partial_sum = self.partial_sum if self.partial_sum is not None else 0
        images, hid_grads = inputs
        weights_grads, = outputs
        fail = sub['fail']
        pad = self.pad

        # convFilterActs will multiply targets by scaleTargets
        # then add scaleOutput * (the convolution value)
        # We could make use of this to implement an inplace
        # addconv op but for this op we just want to compute
        # the convolution so we set them to 0 and 1 respectively
        # Note: there is another version of convFilterActs that
        # does not take these arguments, but it is just a wrapper
        # around the version that does take them, so we save
        # a function call by using the version that we use.
        basic_setup = """
        #define scaleTargets 0
        #define scaleOutput 1
        """

        if self.dense_connectivity:
            basic_setup += """
            #define numGroups 1
            """

        basic_setup += """
        #define paddingStart (-%(pad)d)
        const int *hid_grads_dims = CudaNdarray_HOST_DIMS(%(hid_grads)s);
        const int hidGradsSizeY = hid_grads_dims[1];
        const int hidGradsSizeX = hid_grads_dims[2];
        const int numModules = hidGradsSizeX * hidGradsSizeY;
        int partialSum = %(partial_sum)d > 0 ? %(partial_sum)d : numModules;
        if (numModules %% partialSum > 0) {
            PyErr_Format(PyExc_ValueError,
                "partialSum must divide numModules, but partialSum=%%d and "
                "numModules=%%d", partialSum, numModules);
            %(fail)s;
        }
        """

        if self.stride != 1:
            raise UnimplementedError()
        else:
            basic_setup += """
            #define moduleStride 1
        """
        if self.copy_non_contiguous:
            raise UnimplementedError()
        else:
            basic_setup += "#define WEIGHTACTS_COPY_NON_CONTIGUOUS 0\n"

        # The amount of braces that must be closed at the end
        num_braces = 0

        # Convert images int nv_images, an NVMatrix, for compatibility
        # with the cuda-convnet functions
        setup_nv_images = self._argument_contiguity_check("images") + """
        if (%(images)s->nd != 4)
        {
            PyErr_Format(PyExc_ValueError,
                "images must have nd=4, got nd=%%i", %(images)s->nd);
            %(fail)s;
        }
        { //setup_nv_images brace 1
        const int * images_dims = CudaNdarray_HOST_DIMS(%(images)s);
        const int img_channels = images_dims[0];
        if (img_channels > 3 && img_channels %% 4 != 0)
        {
            PyErr_Format(PyExc_ValueError,
                "images must have 3 or fewer channels, or have a multiple of 4 channels, got %%i",
                img_channels);
            %(fail)s;
        }

        { //setup_nv_images brace 2
        const int * hid_grads_dims = CudaNdarray_HOST_DIMS(%(hid_grads)s);
        const int imgSizeY = images_dims[1];
        const int imgSizeX = images_dims[2];
        const int batch_size = images_dims[3];
        NVMatrix nv_images(%(images)s, img_channels * imgSizeY * imgSizeX, batch_size, "weight_acts: nv_images");
        """
        num_braces += 2

        # Convert hid_grads int nv_hid_grads, an NVMatrix, for compatibility
        # with the cuda-convnet functions
        setup_nv_hid_grads = self._argument_contiguity_check("hid_grads") + """
        if (%(hid_grads)s->nd != 4)
        {
            PyErr_Format(PyExc_ValueError,
                "hid_grads must have nd=4, got nd=%%i", %(hid_grads)s->nd);
            %(fail)s;
        }

        { //setup_nv_hid_grads brace 1
        const int numFilters = hid_grads_dims[0];
        const int batch_size = hid_grads_dims[3];
        NVMatrix nv_hid_grads(%(hid_grads)s, numFilters * hidGradsSizeY *
                                           hidGradsSizeX, batch_size, "weight_acts:nv_hid_grads");
        """
        num_braces += 1

        setup_nv_weights_grads = """
        int filters_dims[4];
        // filters:  (input channels, filter rows, filter cols, output channels)
        filters_dims[0] = img_channels;
        filters_dims[1] = imgSizeY - hidGradsSizeY + 1 - 2 * paddingStart;
        filters_dims[2] = imgSizeX - hidGradsSizeX + 1 - 2 * paddingStart;
        assert(filters_dims[1] == filters_dims[2]); // only square kernels are supported
        filters_dims[3] = numFilters;
        const int filterSize = filters_dims[1];
        int partialsum_storage_dims[5];
        for (int i = 1; i < 5; i++)
        {
            partialsum_storage_dims[i] = filters_dims[i - 1];
        }
        partialsum_storage_dims[0] = numModules / partialSum;
        CudaNdarray *partialsum_storage = NULL;
        if (partialSum != numModules &&
            CudaNdarray_prep_output(&partialsum_storage, 5,
                                    partialsum_storage_dims))
        {
            %(fail)s;
        }

        for (int i = 0; i < 4; i++)
        {
            if (filters_dims[i] <= 0)
            {
                printf("filters_dims[%%d] = %%d\\n", i, filters_dims[i]);
                assert(false);
            }
        }
        if (CudaNdarray_prep_output(& %(weights_grads)s, 4, filters_dims))
        {
            Py_DECREF(partialsum_storage);
            %(fail)s;
        }

        { // setup_nv_weights_grad brace # 1

        NVMatrix nv_weights_grads(%(weights_grads)s, filters_dims[0] * filterSize * filterSize, numFilters,
        "weight_acts:nv_weights_grads");

        """

        num_braces += 1

        # note: imgSizeX is not specified here, it is computed internally
        # (in _filterActsSparse) by the lines:
        # int imgPixels = images.getNumRows() / numImgColors;
        # int imgSizeX = imgPixels / imgSizeY;
        #
        # note: numFilters is not specified here. it is determined by
        # nv_filters.getNumCols()
        #
        # note: the size of the filters is determined by dividing
        # nv_filters.getNumRows() by numFilterColors
        #
        run_kernel = """

        if (partialSum == numModules)
            _weightActs(nv_images, nv_hid_grads, nv_weights_grads,
                        imgSizeY, hidGradsSizeY, hidGradsSizeX, filterSize,
                        paddingStart, moduleStride, img_channels, numGroups,
                        partialSum, 0, 1);
        else {
            NVMatrix nv_partialsum(partialsum_storage, (numModules / partialSum) *
                     filters_dims[0] * filterSize * filterSize, numFilters,
                     "weight_acts: nv_partialsum");
            _weightActs(nv_images, nv_hid_grads, nv_partialsum,
                        imgSizeY, hidGradsSizeY, hidGradsSizeX, filterSize,
                        paddingStart, moduleStride, img_channels, numGroups,
                        partialSum, 0, 1);
            nv_partialsum.reshape((numModules / partialSum), filters_dims[0] * filterSize * filterSize * numFilters);

            // sum out axis 0 of nv_partialsum
            #define AXIS 0
            // scale the contents of nv_weights_grads by 0
            // i.e., clear out its pre-existing content
            #define SCALE_THIS 0
            // scale the new sum by 1, i.e., don't do any scaling
            #define SCALE_SUM 1
            nv_weights_grads.addSum(nv_partialsum, AXIS, SCALE_THIS, SCALE_SUM);

            Py_DECREF(partialsum_storage);
        }
        """

        braces = '}' * num_braces

        rval = (basic_setup + setup_nv_images + setup_nv_hid_grads +
                setup_nv_weights_grads + run_kernel + braces)

        rval = render_string(rval, locals())

        return rval
示例#10
0
    def c_code(self, node, name, inputs, outputs, sub):
        """
        .. todo::

            WRITEME
        """
        images, seed = inputs
        targets, = outputs
        fail = sub['fail']

        # The amount of braces that must be closed at the end
        num_braces = 0

        if self.copy_non_contiguous:
            raise UnimplementedError()
        else:
            basic_setup = "#define STOCHASTICMAXPOOL_COPY_NON_CONTIGUOUS 0\n"

        # Convert images in nv_images, an NVMatrix, for compatibility
        # with the cuda-convnet functions
        setup_nv_images = self._argument_contiguity_check("images") + """
        if (%(images)s->nd != 4)
        {
            PyErr_Format(PyExc_ValueError,
                "images must have nd=4, got nd=%%i", %(images)s->nd);
            %(fail)s;
        }

        { //setup_nv_images brace 1

        const int * images_dims = CudaNdarray_HOST_DIMS(%(images)s);
        const int img_channels = images_dims[0];
        const int imgSizeY = images_dims[1];
        const int imgSizeX = images_dims[2];
        const int batch_size = images_dims[3];

        if(imgSizeY != imgSizeX){
            PyErr_Format(PyExc_ValueError,
                "images must be square(dims[1] == dims[2]). Shape (%%i,%%i,%%i,%%i)",
                img_channels, imgSizeY, imgSizeX, batch_size);
            %(fail)s;
        }
        if(%(ds)s > imgSizeY){
            PyErr_Format(PyExc_ValueError,
                "ds(%%d) must be <= imgSizeX(%%d) and imgSizeY(%%d).",
                %(ds)s, imgSizeX, imgSizeY);
            %(fail)s;
        }
        if(%(start)s >= imgSizeX){
            PyErr_Format(PyExc_ValueError,
                "start is %%d but must be smaller then the images size of %%d x %%d.",
                %(start)s, imgSizeX, imgSizeY);
            %(fail)s;
        }

        NVMatrix nv_images(%(images)s, img_channels * imgSizeY * imgSizeX, batch_size,
        "MaxPool:nv_images");

        //int * seed = CudaNdarray_HOST_DIMS%(seed)s;
        float *  seed = CudaNdarray_DEV_DATA(%(seed)s);
        //int * seed = %(seed)s;
        """
        num_braces += 1

        setup_nv_targets = """
        //int _outputsX = int(ceil((dic['imgSize'] - dic['start'] - dic['sizeX']) / float(dic['stride']))) + 1;
        int _outputsX = ((int)(ceil((imgSizeY - %(start)s - %(ds)s) / ((float)%(stride)s)))) + 1;

        int target_dims [] = {
            img_channels,
            _outputsX,
            _outputsX,
            batch_size };

        if (CudaNdarray_prep_output(& %(targets)s, 4, target_dims))
        {
            %(fail)s;
        }

        { // setup_nv_target brace # 1

        NVMatrix nv_targets(%(targets)s, target_dims[0] * target_dims[1] * target_dims[2],
                            target_dims[3], "MaxPool:nv_targets");

        """

        num_braces += 1

        do_pool = """
        convLocalStochasticMaxPool(nv_images, nv_targets, img_channels, %(ds)s,
                      %(start)s, %(stride)s, _outputsX, MaxPooler(), seed);
        """

        braces = '}' * num_braces

        rval = (basic_setup + setup_nv_images + setup_nv_targets + do_pool +
                braces)
        start = self.start
        stride = self.stride
        ds = self.ds
        rval = rval % locals()

        return rval