Exemplo n.º 1
0
    def c_code(self, node, nodename, inp, out, sub):
        x, a, b, l, s = inp
        y, = out
        fail = sub['fail']
        function_name = "TimCropper_%(nodename)s" % locals()
        ndim_spatial = len(self.patch_shape)
        ndim_total = 2 + ndim_spatial
        strings = []

        # check inputs
        strings.append("""
        if ($x->nd != $ndim_total)
        {
            PyErr_SetString(PyExc_ValueError,
                            "TimCropper: first input must have $ndim_total dimensions");
            $fail;
        }
        """)
        for i, var in enumerate((a, b, l, s)):
            strings.append("""
            if (%(var)s->nd != 2) {
                PyErr_SetString(PyExc_ValueError,
                                "TimCropper: %(i)sth input must have 2 dimensions");
                $fail;
            }
            if (CudaNdarray_HOST_DIMS(%(var)s)[0] != CudaNdarray_HOST_DIMS($x)[0]) {
                PyErr_SetString(PyExc_ValueError,
                                "TimCropper: %(i)sth input must have shape[0] equal to batch size");
                $fail;
            }
            if (CudaNdarray_HOST_DIMS(%(var)s)[1] != $ndim_spatial) {
                PyErr_SetString(PyExc_ValueError,
                                "TimCropper: %(i)sth input must have shape[1] equal to number of spatial dimensions ($ndim_spatial)");
                $fail;
            }
            """ % dict(var=var, i=1 + i))

        # allocate output
        strings.append("""
        int ydims[$ndim_total];
        """)
        for i in (0, 1):
            strings.append("ydims[%i] = CudaNdarray_HOST_DIMS($x)[%i];" % (i, i))
        for i, dim in enumerate(self.patch_shape):
            strings.append("ydims[2 + %i] = %i;" % (i, dim))
        strings.append("""
        if ((NULL == $y) || """ + " || ".join(
            "(CudaNdarray_HOST_DIMS($y)[%i] != ydims[%i])" % (i, i) for i in range(ndim_total))
        + """)
        {
            Py_XDECREF($y);
            $y = (CudaNdarray*)CudaNdarray_New();
            if ((NULL == $y)
                || CudaNdarray_alloc_contiguous($y, $ndim_total, ydims))
            {
                Py_XDECREF($y);
                $y = NULL;
                PyErr_SetString(PyExc_ValueError,
                                "TimCropper: output allocation failed");
                $fail;
            }
        }
        """)

        # due to separability we need to compute weights only for patch
        # row/image row and patch col/image col pairs, instead of for
        # the full cartesian product of patch pixel/image pixel pairs.
        # we precompute the weights in a separate pass.
        weightpass_call = common.weightpass_call(
            nodename, patch_shape=self.patch_shape, V=x, l=l, s=s, fail=fail, grad=False)

        # launch kernel
        W = "W" # so call_arguments knows its name
        arguments = common.call_arguments("x y W".split())
        gridblock = common.gridblock(ndim_spatial, "ydims")
        strings.append("""
        {
            $weightpass_call

            $gridblock
            $function_name<<<grid, block>>>(""" + arguments + """)
            CNDA_THREAD_SYNC;
            cudaError_t err = cudaGetLastError();
            if( cudaSuccess != err)
            {
                PyErr_Format(PyExc_RuntimeError,
                    "Cuda error: %s: %s. (grid: %i x %i;"
                    " block: %i x %i x %i)\\n",
                    "$function_name",
                    cudaGetErrorString(err),
                    grid.x, grid.y,
                    block.x, block.y, block.z);
                $fail;
            }

            // free weight storage
            Py_XDECREF(W);
            W = NULL;
        }""")

        from string import Template
        return Template("\n".join(strings)).substitute(locals())
Exemplo n.º 2
0
    def c_code(self, node, nodename, inp, out, sub):
        x, a, b, l, s = inp
        y, = out
        fail = sub['fail']
        function_name = "TimCropper_%(nodename)s" % locals()
        ndim_spatial = len(self.patch_shape)
        ndim_total = 2 + ndim_spatial
        strings = []

        # check inputs
        strings.append("""
        if ($x->nd != $ndim_total)
        {
            PyErr_SetString(PyExc_ValueError,
                            "TimCropper: first input must have $ndim_total dimensions");
            $fail;
        }
        """)
        for i, var in enumerate((a, b, l, s)):
            strings.append("""
            if (%(var)s->nd != 2) {
                PyErr_SetString(PyExc_ValueError,
                                "TimCropper: %(i)sth input must have 2 dimensions");
                $fail;
            }
            if (CudaNdarray_HOST_DIMS(%(var)s)[0] != CudaNdarray_HOST_DIMS($x)[0]) {
                PyErr_SetString(PyExc_ValueError,
                                "TimCropper: %(i)sth input must have shape[0] equal to batch size");
                $fail;
            }
            if (CudaNdarray_HOST_DIMS(%(var)s)[1] != $ndim_spatial) {
                PyErr_SetString(PyExc_ValueError,
                                "TimCropper: %(i)sth input must have shape[1] equal to number of spatial dimensions ($ndim_spatial)");
                $fail;
            }
            """ % dict(var=var, i=1 + i))

        # allocate output
        strings.append("""
        int ydims[$ndim_total];
        """)
        for i in (0, 1):
            strings.append("ydims[%i] = CudaNdarray_HOST_DIMS($x)[%i];" %
                           (i, i))
        for i, dim in enumerate(self.patch_shape):
            strings.append("ydims[2 + %i] = %i;" % (i, dim))
        strings.append("""
        if ((NULL == $y) || """ + " || ".join(
            "(CudaNdarray_HOST_DIMS($y)[%i] != ydims[%i])" % (i, i)
            for i in range(ndim_total)) + """)
        {
            Py_XDECREF($y);
            $y = (CudaNdarray*)CudaNdarray_New();
            if ((NULL == $y)
                || CudaNdarray_alloc_contiguous($y, $ndim_total, ydims))
            {
                Py_XDECREF($y);
                $y = NULL;
                PyErr_SetString(PyExc_ValueError,
                                "TimCropper: output allocation failed");
                $fail;
            }
        }
        """)

        # due to separability we need to compute weights only for patch
        # row/image row and patch col/image col pairs, instead of for
        # the full cartesian product of patch pixel/image pixel pairs.
        # we precompute the weights in a separate pass.
        weightpass_call = common.weightpass_call(nodename,
                                                 patch_shape=self.patch_shape,
                                                 V=x,
                                                 l=l,
                                                 s=s,
                                                 fail=fail,
                                                 grad=False)

        # launch kernel
        W = "W"  # so call_arguments knows its name
        arguments = common.call_arguments("x y W".split())
        gridblock = common.gridblock(ndim_spatial, "ydims")
        strings.append("""
        {
            $weightpass_call

            $gridblock
            $function_name<<<grid, block>>>(""" + arguments + """)
            CNDA_THREAD_SYNC;
            cudaError_t err = cudaGetLastError();
            if( cudaSuccess != err)
            {
                PyErr_Format(PyExc_RuntimeError,
                    "Cuda error: %s: %s. (grid: %i x %i;"
                    " block: %i x %i x %i)\\n",
                    "$function_name",
                    cudaGetErrorString(err),
                    grid.x, grid.y,
                    block.x, block.y, block.z);
                $fail;
            }

            // free weight storage
            Py_XDECREF(W);
            W = NULL;
        }""")

        from string import Template
        return Template("\n".join(strings)).substitute(locals())
Exemplo n.º 3
0
    def c_code(self, node, nodename, inp, out, sub):
        dCdy, x, a, b, l, s = inp
        dydl, dyds = out
        fail = sub["fail"]
        function_name = "TimCropperGrad_%s" % nodename
        ndim_spatial = len(self.patch_shape)
        ndim_total = 2 + ndim_spatial
        strings = []

        # check inputs
        for i, var in enumerate([dCdy, x]):
            strings.append(
                """
            if (%(var)s->nd != $ndim_total)
            {
                PyErr_SetString(PyExc_ValueError,
                                "TimCropper: %(i)sth input must have $ndim_total dimensions");
                $fail;
            }
            """
                % dict(var=var, i=i)
            )
        for i, var in enumerate((a, b, l, s)):
            strings.append(
                """
            if (%(var)s->nd != 2) {
                PyErr_SetString(PyExc_ValueError,
                                "TimCropper: %(i)sth input must have 2 dimensions");
                $fail;
            }
            if (CudaNdarray_HOST_DIMS(%(var)s)[0] != CudaNdarray_HOST_DIMS($x)[0]) {
                PyErr_SetString(PyExc_ValueError,
                                "TimCropper: %(i)sth input must have shape[0] equal to batch size");
                $fail;
            }
            if (CudaNdarray_HOST_DIMS(%(var)s)[1] != $ndim_spatial) {
                PyErr_SetString(PyExc_ValueError,
                                "TimCropper: %(i)sth input must have shape[1] equal to number of spatial dimensions ($ndim_spatial)");
                $fail;
            }
            """
                % dict(var=var, i=2 + i)
            )

        # allocate outputs
        strings.append(
            """
        int outdims[$ndim_total + 1];
        outdims[$ndim_total] = $ndim_spatial;
        """
        )
        for i in range(ndim_total):
            strings.append("outdims[%i] = CudaNdarray_HOST_DIMS($dCdy)[%i];" % (i, i))
        for var in "ls":
            strings.append(
                (
                    """
            if ((NULL == %(dname)s) || """
                    + " || ".join(
                        "(CudaNdarray_HOST_DIMS(%%(dname)s)[%i] != outdims[%i])" % (i, i) for i in range(ndim_total + 1)
                    )
                    + """)
            {
                Py_XDECREF(%(dname)s);
                %(dname)s = (CudaNdarray*)CudaNdarray_New();
                if ((NULL == %(dname)s)
                    || CudaNdarray_alloc_contiguous(%(dname)s, $ndim_total + 1, outdims))
                {
                    Py_XDECREF(%(dname)s);
                    %(dname)s = NULL;
                    PyErr_SetString(PyExc_ValueError,
                                    "TimCropperGrad: allocation of output %(dlabel)s failed");
                    $fail;
                }
            }
            """
                )
                % dict(name=locals()[var], dname=locals()["dyd" + var], dlabel="dyd" + var)
            )

        # launch kernel
        arguments = []
        for var in "dydl dyds dCdy x".split():
            arguments.append("CudaNdarray_SIZE($%s)" % var)
            arguments.append("CudaNdarray_DEV_DATA($%s)" % var)
            arguments.append("CudaNdarray_DEV_DIMS($%s)" % var)
            arguments.append("CudaNdarray_DEV_STRIDES($%s)" % var)
        arguments.extend("CudaNdarray_DEV_DATA($%s)" % var for var in "abls")
        gridblock = common.gridblock(ndim_spatial, "CudaNdarray_HOST_DIMS(%s)" % dCdy)
        strings.append(
            """
        {
            printf("enter grad op\\n");
            $gridblock
            $function_name<<<grid, block>>>("""
            + ", \n".join(arguments)
            + """)
            CNDA_THREAD_SYNC;
            cudaError_t err = cudaGetLastError();
            if( cudaSuccess != err)
            {
                PyErr_Format(PyExc_RuntimeError,
                    "Cuda error: %s: %s. (grid: %i x %i;"
                    " block: %i x %i x %i)\\n",
                    "$function_name",
                    cudaGetErrorString(err),
                    grid.x, grid.y,
                    block.x, block.y, block.z);
                $fail;
            }
            printf("exit grad op\\n");
        }"""
        )

        from string import Template

        return Template("\n".join(strings)).substitute(locals())
Exemplo n.º 4
0
    def c_code(self, node, nodename, inp, out, sub):
        dCdy, x, a, b, l, s = inp
        dydl, dyds = out
        fail = sub['fail']
        function_name = "TimCropperGrad_%s" % nodename
        ndim_spatial = len(self.patch_shape)
        ndim_total = 2 + ndim_spatial
        strings = []

        # check inputs
        for i, var in enumerate([dCdy, x]):
            strings.append("""
            if (%(var)s->nd != $ndim_total)
            {
                PyErr_SetString(PyExc_ValueError,
                                "TimCropper: %(i)sth input must have $ndim_total dimensions");
                $fail;
            }
            """ % dict(var=var, i=i))
        for i, var in enumerate((a, b, l, s)):
            strings.append("""
            if (%(var)s->nd != 2) {
                PyErr_SetString(PyExc_ValueError,
                                "TimCropper: %(i)sth input must have 2 dimensions");
                $fail;
            }
            if (CudaNdarray_HOST_DIMS(%(var)s)[0] != CudaNdarray_HOST_DIMS($x)[0]) {
                PyErr_SetString(PyExc_ValueError,
                                "TimCropper: %(i)sth input must have shape[0] equal to batch size");
                $fail;
            }
            if (CudaNdarray_HOST_DIMS(%(var)s)[1] != $ndim_spatial) {
                PyErr_SetString(PyExc_ValueError,
                                "TimCropper: %(i)sth input must have shape[1] equal to number of spatial dimensions ($ndim_spatial)");
                $fail;
            }
            """ % dict(var=var, i=2 + i))

        # allocate outputs
        strings.append("""
        int outdims[$ndim_total + 1];
        outdims[$ndim_total] = $ndim_spatial;
        """)
        for i in range(ndim_total):
            strings.append("outdims[%i] = CudaNdarray_HOST_DIMS($dCdy)[%i];" %
                           (i, i))
        for var in "ls":
            strings.append(("""
            if ((NULL == %(dname)s) || """ + " || ".join(
                "(CudaNdarray_HOST_DIMS(%%(dname)s)[%i] != outdims[%i])" %
                (i, i) for i in range(ndim_total + 1)) + """)
            {
                Py_XDECREF(%(dname)s);
                %(dname)s = (CudaNdarray*)CudaNdarray_New();
                if ((NULL == %(dname)s)
                    || CudaNdarray_alloc_contiguous(%(dname)s, $ndim_total + 1, outdims))
                {
                    Py_XDECREF(%(dname)s);
                    %(dname)s = NULL;
                    PyErr_SetString(PyExc_ValueError,
                                    "TimCropperGrad: allocation of output %(dlabel)s failed");
                    $fail;
                }
            }
            """) % dict(name=locals()[var],
                        dname=locals()["dyd" + var],
                        dlabel="dyd" + var))

        # launch kernel
        arguments = []
        for var in "dydl dyds dCdy x".split():
            arguments.append("CudaNdarray_SIZE($%s)" % var)
            arguments.append("CudaNdarray_DEV_DATA($%s)" % var)
            arguments.append("CudaNdarray_DEV_DIMS($%s)" % var)
            arguments.append("CudaNdarray_DEV_STRIDES($%s)" % var)
        arguments.extend("CudaNdarray_DEV_DATA($%s)" % var for var in "abls")
        gridblock = common.gridblock(ndim_spatial,
                                     "CudaNdarray_HOST_DIMS(%s)" % dCdy)
        strings.append("""
        {
            printf("enter grad op\\n");
            $gridblock
            $function_name<<<grid, block>>>(""" + ", \n".join(arguments) + """)
            CNDA_THREAD_SYNC;
            cudaError_t err = cudaGetLastError();
            if( cudaSuccess != err)
            {
                PyErr_Format(PyExc_RuntimeError,
                    "Cuda error: %s: %s. (grid: %i x %i;"
                    " block: %i x %i x %i)\\n",
                    "$function_name",
                    cudaGetErrorString(err),
                    grid.x, grid.y,
                    block.x, block.y, block.z);
                $fail;
            }
            printf("exit grad op\\n");
        }""")

        from string import Template
        return Template("\n".join(strings)).substitute(locals())