Exemplo n.º 1
0
    def test_enum_class(self):
        # Check that invalid enum name raises exception.
        for invalid_name in ("a", "_A", "0"):
            try:
                EnumList(invalid_name)
            except AttributeError:
                pass
            else:
                raise Exception("EnumList with invalid name should faild.")

            try:
                EnumType(**{invalid_name: 0})
            except AttributeError:
                pass
            else:
                raise Exception("EnumType with invalid name should fail.")

        # Check that invalid enum value raises exception.
        try:
            EnumType(INVALID_VALUE="string is not allowed.")
        except TypeError:
            pass
        else:
            raise Exception("EnumType with invalid value should fail.")

        # Check EnumType.
        e1 = EnumType(C1=True, C2=12, C3=True, C4=-1, C5=False, C6=0.0)
        e2 = EnumType(C1=1, C2=12, C3=1, C4=-1.0, C5=0.0, C6=0)
        assert e1 == e2
        assert not (e1 != e2)
        assert hash(e1) == hash(e2)
        # Check access to attributes.
        assert len((e1.ctype, e1.C1, e1.C2, e1.C3, e1.C4, e1.C5, e1.C6)) == 7

        # Check enum with aliases.
        e1 = EnumType(A=("alpha", 0), B=("beta", 1), C=2)
        e2 = EnumType(A=("alpha", 0), B=("beta", 1), C=2)
        e3 = EnumType(A=("a", 0), B=("beta", 1), C=2)
        assert e1 == e2
        assert e1 != e3
        assert e1.filter("beta") == e1.fromalias("beta") == e1.B == 1
        assert e1.filter("C") == e1.fromalias("C") == e1.C == 2

        # Check that invalid alias (same as a constant) raises exception.
        try:
            EnumList(("A", "a"), ("B", "B"))
        except TypeError:
            EnumList(("A", "a"), ("B", "b"))
        else:
            raise Exception(
                "Enum with an alias name equal to a constant name should fail."
            )
Exemplo n.º 2
0
    def test_params_type_with_enums(self):
        # Test that we fail if we create a params type with common enum names inside different enum types.
        try:
            ParamsType(enum1=EnumList("A", "B", "C"),
                       enum2=EnumList("A", "B", "F"))
        except AttributeError:
            pass
        else:
            raise Exception(
                "ParamsType should fail with common enum names inside different enum types."
            )

        # Test that we fail if we create a params type with common names in both aliases and constants.
        try:
            ParamsType(
                enum1=EnumList(("A", "a"), ("B", "b")),
                enum2=EnumList(("ONE", "a"), ("TWO", "two")),
            )
        except AttributeError:
            ParamsType(
                enum1=EnumList(("A", "a"), ("B", "b")),
                enum2=EnumList(("ONE", "one"), ("TWO", "two")),
            )
        else:
            raise Exception(
                "ParamsType should fail when there are aliases with same names as some constants."
            )

        # Test that we can access enum values through wrapper directly.
        w = ParamsType(
            enum1=EnumList("A", ("B", "beta"), "C"),
            enum2=EnumList(("D", "delta"), "E", "F"),
        )
        assert w.A == 0 and w.B == 1 and w.C == 2
        assert w.D == 0 and w.E == 1 and w.F == 2
        # Test constants access through aliases.
        assert w.enum_from_alias("beta") == w.B
        assert w.enum_from_alias("delta") == w.D
        assert (w.enum_from_alias("C") == w.C
                )  # C is not an alias, so it should return a constant named C.
        # Test that other regular wrapper attributes are still available.
        assert len(w.fields) == len(w.types) == w.length
        assert w.name
Exemplo n.º 3
0
class MyOpEnumList(COp):
    __props__ = ("op_chosen", )
    params_type = EnumList(
        ("ADD", "+"),
        ("SUB", "-"),
        ("MULTIPLY", "*"),
        ("DIVIDE", "/"),
        ctype="unsigned long long",
    )

    def __init__(self, choose_op):
        assert self.params_type.ADD == 0
        assert self.params_type.SUB == 1
        assert self.params_type.MULTIPLY == 2
        assert self.params_type.DIVIDE == 3
        assert self.params_type.fromalias("+") == self.params_type.ADD
        assert self.params_type.fromalias("-") == self.params_type.SUB
        assert self.params_type.fromalias("*") == self.params_type.MULTIPLY
        assert self.params_type.fromalias("/") == self.params_type.DIVIDE
        assert self.params_type.has_alias(choose_op)
        self.op_chosen = choose_op

    def get_params(self, node):
        return self.op_chosen

    def make_node(self, a, b):
        return Apply(self,
                     [aes.as_scalar(a), aes.as_scalar(b)], [aes.float64()])

    def perform(self, node, inputs, outputs, op):
        a, b = inputs
        (o, ) = outputs
        if op == self.params_type.ADD:
            o[0] = a + b
        elif op == self.params_type.SUB:
            o[0] = a - b
        elif op == self.params_type.MULTIPLY:
            o[0] = a * b
        elif op == self.params_type.DIVIDE:
            if any(dtype in continuous_dtypes for dtype in (a.dtype, b.dtype)):
                o[0] = a / b
            else:
                o[0] = a // b
        else:
            raise NotImplementedError("Unknown op id " + str(op))
        o[0] = np.float64(o[0])

    def c_code_cache_version(self):
        return (1, )

    def c_code(self, node, name, inputs, outputs, sub):
        return """
        switch(%(op)s) {
            case ADD:
                %(o)s = %(a)s + %(b)s;
                break;
            case SUB:
                %(o)s = %(a)s - %(b)s;
                break;
            case MULTIPLY:
                %(o)s = %(a)s * %(b)s;
                break;
            case DIVIDE:
                %(o)s = %(a)s / %(b)s;
                break;
            default:
                {%(fail)s}
                break;
        }
        """ % dict(op=sub["params"],
                   o=outputs[0],
                   a=inputs[0],
                   b=inputs[1],
                   fail=sub["fail"])
Exemplo n.º 4
0
class BaseCorrMM(OpenMPOp, _NoPythonOp):
    """
    Base class for `CorrMM`, `CorrMM_gradWeights` and
    `CorrMM_gradInputs`. Cannot be used directly.

    Every sub-class must define internal attribute ``_direction`` out of __init__().
    ``_direction`` must take one of following values:

     - "forward" to correlate bottom with weights and store results in top.
     - "backprop weights" to do a valid convolution of bottom with top
       (swapping the first two dimensions) and store results in weights.
     - "backprop inputs" to do a full convolution of top with weights
       (swapping the first two dimensions) and store results in bottom.

    Parameters
    ----------
    border_mode : {'valid', 'full', 'half'}
        Additionally, the padding size could be directly specified by an integer,
        a pair of integers, or two pairs of integers.
    subsample
        Perform subsampling of the output (default: (1, 1)).
    filter_dilation
        Perform dilated correlation (default: (1,1))
    num_groups
        Perform grouped convolutions (default: 1)
    unshared
        Perform unshared correlation (default: False)
    """

    check_broadcast = False
    __props__ = (
        "border_mode",
        "subsample",
        "filter_dilation",
        "num_groups",
        "unshared",
    )

    _direction: Optional[str] = None

    params_type = ParamsType(
        direction=EnumList(
            ("DIRECTION_FORWARD", "forward"),  # 0
            ("DIRECTION_BACKPROP_WEIGHTS", "backprop weights"),  # 1
            ("DIRECTION_BACKPROP_INPUTS", "backprop inputs"),
        ),  # 2
        dH=int64,
        dW=int64,
        dilH=int64,
        dilW=int64,
        padH_l=int64,
        padH_r=int64,
        padW_l=int64,
        padW_r=int64,
        num_groups=int64,
        unshared=int8,
    )

    def __init__(
            self,
            border_mode="valid",
            subsample=(1, 1),
            filter_dilation=(1, 1),
            num_groups=1,
            unshared=False,
            openmp=None,
    ):
        super().__init__(openmp=openmp)
        if isinstance(border_mode, int):
            if border_mode < 0:
                raise ValueError("invalid border_mode {}, which must be a "
                                 "non-negative integer".format(border_mode))
            border_mode = ((border_mode, border_mode), ) * 2
        elif isinstance(border_mode, tuple):
            if len(border_mode) != 2:
                raise ValueError("invalid border_mode {} which must be a "
                                 "tuple of length 2".format(border_mode))
            border = ()
            for mode in border_mode:
                if isinstance(mode,
                              tuple) and len(mode) == 2 and min(mode) >= 0:
                    border += ((int(mode[0]), int(mode[1])), )
                elif mode >= 0:
                    border += ((int(mode), int(mode)), )
                else:
                    raise ValueError(
                        "invalid border mode {}. The tuple can only contain "
                        "integers or tuples of length 2".format(border_mode))
            border_mode = border
        elif border_mode not in ("valid", "full", "half"):
            raise ValueError(
                "invalid border_mode {}, which must be either "
                '"valid", "full", "half", an integer or a tuple '
                "of two integers or a pair of integers".format(border_mode))
        self.border_mode = border_mode
        if len(subsample) != 2:
            raise ValueError("subsample must have two elements")
        if len(filter_dilation) != 2:
            raise ValueError("filter_dilation must have two elements")
        self.subsample = tuple(subsample)
        self.filter_dilation = tuple(filter_dilation)
        self.unshared = unshared

        if not config.blas__ldflags:
            # Aesara will use a NumPy C implementation of [sd]gemm_ instead.
            self.blas_type = ""
        else:
            if "openblas" in config.blas__ldflags:
                self.blas_type = "openblas"
            elif "mkl" in config.blas__ldflags:
                self.blas_type = "mkl"
            else:
                self.blas_type = ""

        if self._direction not in [
                "forward", "backprop weights", "backprop inputs"
        ]:
            raise ValueError("_direction must be one of 'forward', "
                             "'backprop weights', 'backprop inputs'")
        if num_groups < 1:
            raise ValueError("Number of groups should be greater than 0")
        self.num_groups = num_groups

    @property
    def pad(self):
        if self.border_mode == "half":
            return ((-1, -1), ) * 2
        elif self.border_mode == "full":
            return ((-2, -2), ) * 2
        elif isinstance(self.border_mode, tuple):
            return self.border_mode
        else:
            assert self.border_mode == "valid"
            return ((0, 0), ) * 2

    # Direction should be converted to real enum value,
    # as it is compared to integer later in c_code_helper().
    direction = property(
        lambda self: self.params_type.enum_from_alias(self._direction))

    dH = property(lambda self: self.subsample[0])
    dW = property(lambda self: self.subsample[1])

    dilH = property(lambda self: self.filter_dilation[0])
    dilW = property(lambda self: self.filter_dilation[1])

    padH_l = property(lambda self: self.pad[0][0])
    padH_r = property(lambda self: self.pad[0][1])
    padW_l = property(lambda self: self.pad[1][0])
    padW_r = property(lambda self: self.pad[1][1])

    def __str__(self):
        return "{}{{{}, {}, {}, {} {}}}".format(
            self.__class__.__name__,
            self.border_mode,
            str(self.subsample),
            str(self.filter_dilation),
            str(self.num_groups),
            str(self.unshared),
        )

    @staticmethod
    def as_common_dtype(in1, in2):
        """
        Upcast input variables if necessary.
        """
        dtype = aesara.scalar.upcast(in1.dtype, in2.dtype)
        return in1.astype(dtype), in2.astype(dtype)

    def __setstate__(self, d):
        self.__dict__.update(d)
        if not hasattr(self, "num_groups"):
            self.num_groups = 1

    def c_support_code(self, **kwargs):
        ccodes = blas_headers.blas_header_text()
        if self.blas_type == "openblas":
            ccodes += blas_headers.openblas_threads_text()
        elif self.blas_type == "mkl":
            ccodes += blas_headers.mkl_threads_text()
        return ccodes

    def c_libraries(self, **kwargs):
        return ldflags()

    def c_compile_args(self, **kwargs):
        compile_args = ldflags(libs=False, flags=True)
        compile_args += super().c_compile_args(**kwargs)
        return compile_args

    def c_lib_dirs(self, **kwargs):
        return ldflags(libs=False, libs_dir=True)

    def c_header_dirs(self, **kwargs):
        return ldflags(libs=False, include_dir=True)

    def c_headers(self, **kwargs):
        headers = ["<stdio.h>"]
        headers += super().c_headers(**kwargs)
        return headers

    def c_code_cache_version(self):
        # raise this whenever modifying any of the support_code_files
        return (10, self.openmp, blas_header_version())

    def c_support_code_apply(self, node, nodename):
        # REMEMBER TO RAISE c_code_cache_version when changing any of
        # these files
        sub = {}
        dtype = str(node.__dict__["inputs"][0].dtype)
        assert dtype in ("float32", "float64")
        if dtype == "float32":
            sub["gemm"] = "sgemm_"
            sub["gemv"] = "sgemv_"
            sub["float_type"] = "npy_float"
            sub["float_typenum"] = "NPY_FLOAT"
            sub["n_bytes"] = 4
            sub["c_float_type"] = "float"
        else:
            sub["gemm"] = "dgemm_"
            sub["gemv"] = "dgemv_"
            sub["float_type"] = "npy_double"
            sub["float_typenum"] = "NPY_DOUBLE"
            sub["n_bytes"] = 8
            sub["c_float_type"] = "double"

        if self.openmp:
            sub["omp_flags"] = "#pragma omp parallel for schedule(static)"
            sub["omp_get_max_threads"] = "omp_get_max_threads()"
            sub["omp_get_thread_num"] = "omp_get_thread_num()"

            if self.blas_type == "openblas":
                sub["blas_set_num_threads"] = "openblas_set_num_threads"
                sub["blas_get_num_threads"] = "openblas_get_num_threads()"
            elif self.blas_type == "mkl":
                sub["blas_set_num_threads"] = "mkl_set_num_threads"
                sub["blas_get_num_threads"] = "mkl_get_max_threads()"
            else:
                sub["blas_set_num_threads"] = ""
                sub["blas_get_num_threads"] = "0"
        else:
            sub["omp_flags"] = ""
            sub["omp_get_max_threads"] = "1"
            sub["omp_get_thread_num"] = "0"
            sub["blas_set_num_threads"] = ""
            sub["blas_get_num_threads"] = "0"

        final_code = ""
        with open(
                os.path.join(
                    os.path.split(__file__)[0],
                    os.path.join("c_code", "corr_gemm.c"))) as f:
            code = f.read()
            final_code += code
        return final_code % sub

    def c_code_helper(self,
                      bottom,
                      weights,
                      top,
                      sub,
                      height=None,
                      width=None):
        """
        This generates the C code for CorrMM (direction="forward"),
        CorrMM_gradWeights (direction="backprop weights"), and
        CorrMM_gradInputs (direction="backprop inputs").
        Depending on the direction, one of bottom, weights, top will
        receive the output, while the other two serve as inputs.

        :param bottom: Variable name of the input images in the forward pass,
            or the gradient of the input images in backprop wrt. inputs
        :param weights: Variable name of the filters in the forward pass,
            or the gradient of the filters in backprop wrt. weights
        :param top: Variable name of the output images / feature maps in the
            forward pass, or the gradient of the outputs in the backprop passes
        :param sub: Dictionary of substitutions useable to help generating the
            C code.
        :param height: If self.subsample[0] != 1, a variable giving the height
            of the filters for direction="backprop weights" or the height of
            the input images for direction="backprop inputs".

            If self.border_mode == 'half', a variable giving the height of the
            filters for direction="backprop weights".  Ignored otherwise.
        :param width: If self.subsample[1] != 1, a variable giving the width
            of the filters for direction="backprop weights" or the width of the
            input images for direction="backprop inputs".

            If self.border_mode == 'half', a variable giving the width of the
            filters for direction="backprop weights".  Ignored otherwise.
        """

        # When subsampling, we cannot unambiguously infer the height and width
        # of bottom and weights from top, so we require them to be given.
        # Similarly, when border_mode="half", we cannot infer the weight size.
        if height:
            height = f"(*(npy_int64 *)(PyArray_DATA({height})))"
        else:
            if ((self.direction != 0) and
                (self.dH != 1)) or ((self.direction == 1) and
                                    (self.padH_l == -1 or self.padH_r == -1)):
                raise ValueError(
                    "height must be given for backprop with vertical sampling or border_mode='half'"
                )
            height = "-1"
        if width:
            width = f"(*(npy_int64 *)(PyArray_DATA({width})))"
        else:
            if ((self.direction != 0) and
                (self.dW != 1)) or ((self.direction == 1) and
                                    (self.padW_l == -1 or self.padW_r == -1)):
                raise ValueError(
                    "width must be given for backprop with horizontal sampling or border_mode='half'"
                )
            width = "-1"

        return """
    // Mandatory args
    int direction = %(params)s->direction;  // forward, bprop weights, bprop inputs

    // Optional args
    int dH = %(params)s->dH;
    int dW = %(params)s->dW;
    int dilH = %(params)s->dilH;
    int dilW = %(params)s->dilW;
    int padH_l = %(params)s->padH_l;
    int padH_r = %(params)s->padH_r;
    int padW_l = %(params)s->padW_l;
    int padW_r = %(params)s->padW_r;
    int numgroups = %(params)s->num_groups;
    int unshared = %(params)s->unshared;

    PyArrayObject * bottom = %(bottom)s;
    PyArrayObject * weights = %(weights)s;
    PyArrayObject * top = %(top)s;
    PyArrayObject * out2 = NULL;
    PyArrayObject **out = NULL;

    switch(%(params)s->direction) {
        case DIRECTION_FORWARD:
            out = &%(top)s;
            break;
        case DIRECTION_BACKPROP_WEIGHTS:
            out = &%(weights)s;
            break;
        case DIRECTION_BACKPROP_INPUTS:
            out = &%(bottom)s;
            break;
        default:
            PyErr_SetString(PyExc_ValueError, "CPU CorrMM: Invalid direction.");
            {%(fail)s}
            break;
    }

    int wdim, odim;
    wdim = unshared ? 6 : 4;
    odim = 4; //Can be set to 6 later for unshared backprop wrt weights

    // Obtain or infer kernel width and height
    // (we need to know it early to be able to handle auto-padding)
    int kH, kW, dil_kH, dil_kW;
    if (direction != 1) {
        // weight is an input variable, we can just read its shape
        kH = PyArray_DIMS(weights)[wdim-2];
        kW = PyArray_DIMS(weights)[wdim-1];
    }
    else {
        if (%(height)s != -1) {
            // kernel height is specified (perhaps vertical subsampling or half padding)
            kH = %(height)s;
        }
        else if (padH_l == -2 || padH_r == -2) {
            // vertical full padding, we can infer the kernel height
            kH = (2 - PyArray_DIMS(bottom)[2] + (PyArray_DIMS(top)[2] - 1) * dH - 1)/ dilH + 1;
        }
        else {
            // explicit padding, we can infer the kernel height
            kH = (PyArray_DIMS(bottom)[2] + padH_l + padH_r - (PyArray_DIMS(top)[2] - 1) * dH - 1) / dilH +1;
        }
        if (%(width)s != -1) {
            // kernel width is specified (perhaps horizontal subsampling or half padding)
            kW = %(width)s;
        }
        else if (padW_l == -2 || padW_r == -2) {
            kW = (2 - PyArray_DIMS(bottom)[3] + (PyArray_DIMS(top)[3] - 1) * dW - 1) / dilW + 1;
        }
        else {
            kW = (PyArray_DIMS(bottom)[3] + padW_l + padW_r - (PyArray_DIMS(top)[3] - 1) * dW - 1) / dilW + 1;
        }
    }

    // Implicit dilated kernel size
    dil_kH = (kH - 1) * dilH + 1;
    dil_kW = (kW - 1) * dilW + 1;

    // Auto-padding if requested
    if (padH_l == -1 || padH_r == -1) {  // vertical half padding
        padH_l = padH_r = dil_kH / 2;
    }
    else if (padH_l == -2 || padH_r == -2) {  // vertical full padding
        padH_l = padH_r = dil_kH - 1;
    }
    else if (padH_l < -2 || padH_r < -2) {
        PyErr_SetString(PyExc_ValueError, "BaseCorrMM: padH_l and padH_r must be >= -2");
        %(fail)s
    }
    if (padW_l == -1 || padW_r == -1) {  // horizontal half padding
        padW_l = padW_r = dil_kW / 2;
    }
    else if (padW_l == -2 || padW_r == -2) {  // horizontal full padding
        padW_l = padW_r = dil_kW - 1;
    }
    else if (padW_l < -2 || padW_r < -2) {
        PyErr_SetString(PyExc_ValueError, "BaseCorrMM: padW_l and padW_r must be >= -2");
        %(fail)s
    }

    // Infer output shape
    npy_intp out_dim[6];
    out_dim[4] = out_dim[5] = 0; //Only used for unshared backprop wrt weights
    switch(direction) {
    case 0:  // forward pass
        // output is top: (batchsize, num_filters, height, width)
        // height and width: top = (bottom + pad_l + pad_r - ((weight-1)*dil + 1)) / sample + 1
        out_dim[0] = (npy_intp)PyArray_DIMS(bottom)[0];
        out_dim[1] = (npy_intp)PyArray_DIMS(weights)[0];
        out_dim[2] = (npy_intp)((PyArray_DIMS(bottom)[2] + padH_l + padH_r - ((PyArray_DIMS(weights)[wdim-2]-1)*dilH + 1)) / dH + 1);
        out_dim[3] = (npy_intp)((PyArray_DIMS(bottom)[3] + padW_l + padW_r - ((PyArray_DIMS(weights)[wdim-1]-1)*dilW + 1)) / dW + 1);
        if (out_dim[0] < 0 || out_dim[1] < 0 || out_dim[2] <= 0 || out_dim[3] <= 0)
        {
            if (unshared) {
                PyErr_Format(PyExc_ValueError,
                             "CorrMM: impossible output shape\\n"
                             "  bottom shape: %%ld x %%ld x %%ld x %%ld\\n"
                             "  weights shape: %%ld x %%ld x %%ld x %%ld x %%ld x %%ld\\n"
                             "  top shape: %%ld x %%ld x %%ld x %%ld\\n",
                             (long int)PyArray_DIMS(bottom)[0], (long int)PyArray_DIMS(bottom)[1],
                             (long int)PyArray_DIMS(bottom)[2], (long int)PyArray_DIMS(bottom)[3],
                             (long int)PyArray_DIMS(weights)[0], (long int)PyArray_DIMS(weights)[1],
                             (long int)PyArray_DIMS(weights)[2], (long int)PyArray_DIMS(weights)[3],
                             (long int)PyArray_DIMS(weights)[4], (long int)PyArray_DIMS(weights)[5],
                             (long int)out_dim[0], (long int)out_dim[1], (long int)out_dim[2],
                             (long int)out_dim[3]);
            }
            else {
                PyErr_Format(PyExc_ValueError,
                             "CorrMM: impossible output shape\\n"
                             "  bottom shape: %%ld x %%ld x %%ld x %%ld\\n"
                             "  weights shape: %%ld x %%ld x %%ld x %%ld\\n"
                             "  top shape: %%ld x %%ld x %%ld x %%ld\\n",
                             (long int)PyArray_DIMS(bottom)[0], (long int)PyArray_DIMS(bottom)[1],
                             (long int)PyArray_DIMS(bottom)[2], (long int)PyArray_DIMS(bottom)[3],
                             (long int)PyArray_DIMS(weights)[0], (long int)PyArray_DIMS(weights)[1],
                             (long int)PyArray_DIMS(weights)[2], (long int)PyArray_DIMS(weights)[3],
                             (long int)out_dim[0], (long int)out_dim[1], (long int)out_dim[2],
                             (long int)out_dim[3]);
            }
            %(fail)s
        }
        break;
    case 1:  // backprop wrt. weights
        // output is weights: (num_filters, num_channels, height, width)
        // height and width: weights = (bottom + pad_l + pad_r - (top - 1) * sample - 1) / dil + 1
        out_dim[0] = (npy_intp)PyArray_DIMS(top)[1];
        if (unshared){
            odim = 6;
            out_dim[1] = (npy_intp)PyArray_DIMS(top)[2];
            out_dim[2] = (npy_intp)PyArray_DIMS(top)[3];
        }
        out_dim[wdim-3] = (npy_intp)PyArray_DIMS(bottom)[1] / numgroups;
        out_dim[wdim-2] = (npy_intp)kH;  // already inferred further above
        out_dim[wdim-1] = (npy_intp)kW;  // how convenient
        if (unshared) {
            if (out_dim[0] < 0 || out_dim[1] <= 0 || out_dim[2] <= 0 || out_dim[3] < 0
                    || out_dim[4] <= 0 || out_dim[5] <= 0){
                PyErr_Format(PyExc_ValueError,
                             "CorrMM backprop wrt. weights: impossible output shape\\n"
                             "  bottom shape: %%ld x %%ld x %%ld x %%ld\\n"
                             "  weights shape: %%ld x %%ld x %%ld x %%ld x %%ld x %%ld\\n"
                             "  top shape: %%ld x %%ld x %%ld x %%ld\\n",
                             (long int)PyArray_DIMS(bottom)[0], (long int)PyArray_DIMS(bottom)[1],
                             (long int)PyArray_DIMS(bottom)[2], (long int)PyArray_DIMS(bottom)[3],
                             (long int)out_dim[0], (long int)out_dim[1], (long int)out_dim[2],
                             (long int)out_dim[3], (long int)out_dim[4], (long int)out_dim[5],
                             (long int)PyArray_DIMS(top)[0], (long int)PyArray_DIMS(top)[1],
                             (long int)PyArray_DIMS(top)[2], (long int)PyArray_DIMS(top)[3]);
                %(fail)s
            }
        }
        else {
            if (out_dim[0] < 0 || out_dim[1] < 0 || out_dim[2] <= 0 || out_dim[3] <= 0)
            {
                PyErr_Format(PyExc_ValueError,
                             "CorrMM backprop wrt. weights: impossible output shape\\n"
                             "  bottom shape: %%ld x %%ld x %%ld x %%ld\\n"
                             "  weights shape: %%ld x %%ld x %%ld x %%ld\\n"
                             "  top shape: %%ld x %%ld x %%ld x %%ld\\n",
                             (long int)PyArray_DIMS(bottom)[0], (long int)PyArray_DIMS(bottom)[1],
                             (long int)PyArray_DIMS(bottom)[2], (long int)PyArray_DIMS(bottom)[3],
                             (long int)out_dim[0], (long int)out_dim[1], (long int)out_dim[2],
                             (long int)out_dim[3],
                             (long int)PyArray_DIMS(top)[0], (long int)PyArray_DIMS(top)[1],
                             (long int)PyArray_DIMS(top)[2], (long int)PyArray_DIMS(top)[3]);
                %(fail)s
            }
        }
        break;
    case 2:  // backprop wrt. inputs
        // output is bottom: (batchsize, num_channels, height, width)
        // height and width: bottom = (top - 1) * sample + (weights-1)*dil + 1 - 2*pad
        out_dim[0] = (npy_intp)PyArray_DIMS(top)[0];
        out_dim[1] = (npy_intp)PyArray_DIMS(weights)[wdim-3] * numgroups;
        out_dim[2] = (npy_intp)((%(height)s != -1) ? %(height)s : (PyArray_DIMS(top)[2] - 1) * dH + (PyArray_DIMS(weights)[wdim-2]-1)*dilH + 1 - padH_l - padH_r);
        out_dim[3] = (npy_intp)((%(width)s != -1) ? %(width)s : (PyArray_DIMS(top)[3] - 1) * dW + (PyArray_DIMS(weights)[wdim-1]-1)*dilW + 1 - padW_l - padW_r);
        if (unshared) {
            if (out_dim[0] < 0 || out_dim[1] < 0 || out_dim[2] <= 0 || out_dim[3] <= 0)
            {
                PyErr_Format(PyExc_ValueError,
                             "CorrMM backprop wrt. inputs: impossible output shape\\n"
                             "  bottom shape: %%ld x %%ld x %%ld x %%ld\\n"
                             "  weights shape: %%ld x %%ld x %%ld x %%ld x %%ld x %%ld\\n"
                             "  top shape: %%ld x %%ld x %%ld x %%ld\\n",
                             (long int)out_dim[0], (long int)out_dim[1], (long int)out_dim[2],
                             (long int)out_dim[3],
                             (long int)PyArray_DIMS(weights)[0], (long int)PyArray_DIMS(weights)[1],
                             (long int)PyArray_DIMS(weights)[2], (long int)PyArray_DIMS(weights)[3],
                             (long int)PyArray_DIMS(weights)[4], (long int)PyArray_DIMS(weights)[5],
                             (long int)PyArray_DIMS(top)[0], (long int)PyArray_DIMS(top)[1],
                             (long int)PyArray_DIMS(top)[2], (long int)PyArray_DIMS(top)[3]);
                %(fail)s
            }
        }
        else {
            if (out_dim[0] < 0 || out_dim[1] < 0 || out_dim[2] <= 0 || out_dim[3] <= 0)
            {
                PyErr_Format(PyExc_ValueError,
                             "CorrMM backprop wrt. inputs: impossible output shape\\n"
                             "  bottom shape: %%ld x %%ld x %%ld x %%ld\\n"
                             "  weights shape: %%ld x %%ld x %%ld x %%ld\\n"
                             "  top shape: %%ld x %%ld x %%ld x %%ld\\n",
                             (long int)out_dim[0], (long int)out_dim[1], (long int)out_dim[2],
                             (long int)out_dim[3],
                             (long int)PyArray_DIMS(weights)[0], (long int)PyArray_DIMS(weights)[1],
                             (long int)PyArray_DIMS(weights)[2], (long int)PyArray_DIMS(weights)[3],
                             (long int)PyArray_DIMS(top)[0], (long int)PyArray_DIMS(top)[1],
                             (long int)PyArray_DIMS(top)[2], (long int)PyArray_DIMS(top)[3]);
                %(fail)s
            }
        }
        break;
    default:
        PyErr_SetString(PyExc_ValueError, "BaseCorrMM: direction must be 0, 1, or 2\\n");
        %(fail)s
    }

    // Prepare output array
    int typenum;
    int failure;
    failure = !(*out
           && PyArray_NDIM(*out)==odim
           && PyArray_IS_C_CONTIGUOUS(*out)
           && PyArray_DIMS(*out)[0]==out_dim[0]
           && PyArray_DIMS(*out)[1]==out_dim[1]
           && PyArray_DIMS(*out)[2]==out_dim[2]
           && PyArray_DIMS(*out)[3]==out_dim[3]);
    if (odim == 6){
        failure = failure || !(PyArray_DIMS(*out)[4]==out_dim[4]
                && PyArray_DIMS(*out)[5]==out_dim[5]);
    }
    if ( failure )
    {
        Py_XDECREF(*out);
        if (direction != 1) {
          typenum = PyArray_TYPE(weights);
        }
        else {
          typenum = PyArray_TYPE(bottom);
        }
        //Change to PyArray_ZEROS which is faster than PyArray_EMPTY.
        *out = (PyArrayObject*)PyArray_ZEROS(odim,
                                          out_dim,
                                          typenum,
                                          0);
        if (NULL == *out)
        {
            if (odim == 4) {
                PyErr_Format(PyExc_RuntimeError,
                        "BaseCorrMM: Failed to allocate output of %%lld x %%lld x %%lld x %%lld",
                        (long long)out_dim[0], (long long)out_dim[1], (long long)out_dim[2], (long long)out_dim[3]);
            }
            if (odim == 6) {
                PyErr_Format(PyExc_RuntimeError,
                        "BaseCorrMM: Failed to allocate output of %%lld x %%lld x %%lld x %%lld %%lld %%lld",
                        (long long)out_dim[0], (long long)out_dim[1], (long long)out_dim[2], (long long)out_dim[3],
                        (long long)out_dim[4], (long long)out_dim[5]);
            }
            %(fail)s
        }
    }

    // Call corrMM code
    out2 = corrMM(%(bottom)s, %(weights)s, %(top)s, direction, dH, dW, dilH, dilW,
                padH_l, padH_r, padW_l, padW_r, numgroups, unshared);
    if (out2==NULL){
       %(fail)s
    }
    assert (out2 == *out);

""" % dict(
            bottom=bottom,
            weights=weights,
            top=top,
            height=height,
            width=width,
            fail=sub["fail"],
            params=sub["params"],
        )
Exemplo n.º 5
0
class Images2Neibs(COp):
    """
    Reshapes the input as a 2D tensor where each row is an pooling
    example.

    Parameters
    ----------
    mode : {'valid', 'ignore_borders', 'wrap_centered'}
        - 'valid' :
            Requires an input that is a multiple of the pooling factor
            (in each direction).
        - 'half' :
            Equivalent to 'valid' if we pre-pad with zeros the input on
            each side by (neib_shape[0]//2, neib_shape[1]//2)
        - 'full' :
            Equivalent to 'valid' if we pre-pad with zeros the input on
            each side by (neib_shape[0] - 1, neib_shape[1] - 1)
        - 'ignore_borders' :
            Same as valid, but will ignore the borders if the shape(s)
            of the input is not a multiple of the pooling factor(s).
        - 'wrap_centered' :
            ?? TODO comment

    """

    __props__ = ("mode", )
    BORDER_MODE = EnumList(
        ("MODE_VALID", "valid"),
        ("MODE_HALF", "half"),
        ("MODE_FULL", "full"),
        ("MODE_WRAP_CENTERED", "wrap_centered"),
        ("MODE_IGNORE_BORDERS", "ignore_borders"),
    )
    params_type = BORDER_MODE

    def get_params(self, node):
        return self.mode

    def __init__(self, mode="valid"):
        implemented_modes = self.BORDER_MODE.get_aliases()
        if mode not in implemented_modes:
            raise NotImplementedError(
                f"Only modes {', '.join(implemented_modes)} have been implemented for {type(self).__name__}"
            )
        self.mode = mode

    def __str__(self):
        return self.__class__.__name__ + "{%s}" % self.mode

    def __setstate__(self, d):
        self.__dict__.update(d)
        if not hasattr(self, "mode"):
            self.mode = "valid"

    def make_node(self, ten4, neib_shape, neib_step=None):
        """
        Parameters
        ----------
        ten4 : a list of lists of images
            ten4 is of shape (list 1 dim, list 2 dim, row, col).
        neib_shape
            (r,c) where r is the height of the neighborhood in rows and c is
            the width of the neighborhood in columns.
        neib_step
            (dr,dc) where dr is the number of rows to skip between patch and dc
            is the number of columns. When None, this is the same as neib_shape
            (patch are disjoint).

        Returns
        -------
        matrix
            A 2D matrix, written using the following pattern::

                idx = 0
                for i in range(list 1 dim)
                    for j in range(list 2 dim)
                        for k in <image column coordinates>
                            for l in <image row coordinates>
                                output[idx,:]
                                     = flattened version of ten4[i,j,l:l+r,k:k+c]
                                idx += 1

            .. note:: The op isn't necessarily implemented internally with these
                for loops, they're just the easiest way to describe the output
                pattern.

        """
        ten4 = as_tensor_variable(ten4)
        neib_shape = as_tensor_variable(neib_shape)
        if neib_step is None:
            neib_step = neib_shape
        else:
            neib_step = as_tensor_variable(neib_step)

        assert ten4.ndim == 4
        assert neib_shape.ndim == 1
        assert neib_step.ndim == 1

        return Apply(self, [ten4, neib_shape, neib_step],
                     [matrix(dtype=ten4.type.dtype)])

    def grad(self, inp, grads):
        x, neib_shape, neib_step = inp
        (gz, ) = grads

        if self.mode in ("valid", "ignore_borders"):
            if (neib_shape is neib_step or neib_shape == neib_step or
                    # Aesara Constant == do not compare the data
                    # the equals function do that.
                (hasattr(neib_shape, "equals") and neib_shape.equals(neib_step)
                 )):
                return [
                    neibs2images(gz, neib_shape, x.shape, mode=self.mode),
                    grad_undefined(self, 1, neib_shape),
                    grad_undefined(self, 2, neib_step),
                ]

        if self.mode in ["valid"]:
            # Iterate over neighborhood positions, summing contributions.
            def pos2map(pidx, pgz, prior_result, neib_shape, neib_step):
                """
                Helper function that adds gradient contribution from a single
                neighborhood position i,j.
                pidx = Index of position within neighborhood.
                pgz  = Gradient of shape (batch_size*num_channels*neibs)
                prior_result  = Shape (batch_size, num_channnels, rows, cols)
                neib_shape = Number of rows, cols in a neighborhood.
                neib_step  = Step sizes from image2neibs.
                """
                nrows, ncols = neib_shape
                rstep, cstep = neib_step
                batch_size, num_channels, rows, cols = prior_result.shape
                i = pidx // ncols
                j = pidx - (i * ncols)
                # This position does not touch some img pixels in valid mode.
                result_indices = prior_result[:, :,
                                              i:(rows - nrows + i + 1):rstep,
                                              j:(cols - ncols + j + 1):cstep, ]
                newshape = ((batch_size, num_channels) +
                            ((rows - nrows) // rstep + 1, ) +
                            ((cols - ncols) // cstep + 1, ))
                return inc_subtensor(result_indices, pgz.reshape(newshape))

            indices = arange(neib_shape[0] * neib_shape[1])
            pgzs = gz.dimshuffle((1, 0))
            result, _ = aesara.scan(
                fn=pos2map,
                sequences=[indices, pgzs],
                outputs_info=zeros(x.shape),
                non_sequences=[neib_shape, neib_step],
            )
            grad_input = result[-1]
            return [
                grad_input,
                grad_undefined(self, 1, neib_shape),
                grad_undefined(self, 2, neib_step),
            ]

        return [
            grad_not_implemented(self, 0, x),
            grad_undefined(self, 1, neib_shape),
            grad_undefined(self, 2, neib_step),
        ]

    def c_code_cache_version(self):
        return (10, )

    def perform(self, node, inp, out_, params):
        ten4, neib_shape, neib_step = inp
        (z, ) = out_
        # GpuImages2Neibs should not run this perform in DebugMode
        if type(self) != Images2Neibs:
            raise aesara.graph.utils.MethodNotDefined()

        def CEIL_INTDIV(a, b):
            if a % b:
                return (a // b) + 1
            else:
                return a // b

        grid_c = -1  # number of patch in height
        grid_d = -1  # number of patch in width
        assert ten4.ndim == 4
        assert neib_shape.ndim == 1
        assert neib_shape.shape[0] == 2
        assert neib_step.ndim == 1
        assert neib_step.shape[0] == 2
        c, d = neib_shape
        step_x, step_y = neib_step
        mode = self.mode
        if step_x <= 0 or step_y <= 0:
            raise ValueError("neib_step wrong step ; values <= 0. Got " +
                             str(neib_step))
        if c <= 0 or d <= 0:
            raise ValueError("neib_shape values <=0. Got " + str(neib_shape))

        if mode == "wrap_centered":
            if (c % 2 != 1) or (d % 2 != 1):
                raise TypeError(
                    "Images2Neibs:"
                    " in mode wrap_centered need patch with odd shapes")

            if (ten4.shape[2] < c) or (ten4.shape[3] < d):
                raise TypeError(
                    "Images2Neibs: in wrap_centered mode, don't support"
                    " image shapes smaller then the patch shapes:"
                    f" neib_shape=({int(c)},{int(d)}), ten4[2:]=[{int(ten4.shape[2])},{int(ten4.shape[3])}]"
                )
            grid_c = CEIL_INTDIV(ten4.shape[2], step_x)
            grid_d = CEIL_INTDIV(ten4.shape[3], step_y)
        elif mode == "valid":
            if (ten4.shape[2] < c) or (((ten4.shape[2] - c) % step_x) != 0):
                raise TypeError(
                    f"neib_shape[0]={int(c)}, neib_step[0]={int(step_x)} and"
                    f" ten4.shape[2]={int(ten4.shape[2])} not consistent")
            if (ten4.shape[3] < d) or (((ten4.shape[3] - d) % step_y) != 0):
                raise TypeError(
                    f"neib_shape[1]={int(d)}, neib_step[1]={int(step_y)} and"
                    f" ten4.shape[3]={int(ten4.shape[3])} not consistent")
            # number of patch in height
            grid_c = 1 + ((ten4.shape[2] - c) // step_x)
            # number of patch in width
            grid_d = 1 + ((ten4.shape[3] - d) // step_y)
        elif mode == "ignore_borders":
            # number of patch in height
            grid_c = 1 + ((ten4.shape[2] - c) // step_x)
            # number of patch in width
            grid_d = 1 + ((ten4.shape[3] - d) // step_y)
        elif mode == "half":
            # This is equivalent to 'valid' with padding (c // 2, d // 2) on both sides
            # Thus the expanded image will have size (h + 2 * (c // 2), w + 2 * (d // 2))
            # Plugging these in the equation for 'valid' we get
            # h + 2 * (c // 2) - c  = h - (c % 2)
            # w + 2 * (d // 2) - c  = w - (d % 2)
            if (ten4.shape[2] < c) or (((ten4.shape[2] -
                                         (c % 2)) % step_x) != 0):
                raise TypeError(
                    f"neib_shape[0]={int(c)}, neib_step[0]={int(step_x)} and"
                    f" ten4.shape[2]={int(ten4.shape[2])} not consistent")
            if (ten4.shape[3] < d) or (((ten4.shape[3] -
                                         (d % 2)) % step_y) != 0):
                raise TypeError(
                    f"neib_shape[0]={int(d)}, neib_step[0]={int(step_y)} and"
                    f" ten4.shape[3]={int(ten4.shape[3])} not consistent")
            # number of patch in height
            grid_c = 1 + ((ten4.shape[2] - (c % 2)) // step_x)
            # number of patch in width
            grid_d = 1 + ((ten4.shape[3] - (d % 2)) // step_y)
        elif mode == "full":
            # This is equivalent to 'valid' with padding (c - 1, d - 1) on both sides
            # Thus the expanded image will have size (h + 2 * (c - 1), w + 2 * (d - 1))
            # Plugging these in the equation for 'valid' we get
            # h + 2 * (c - 1) - c  = h + c - 2
            # w + 2 * (d - 1) - c  = w + d - 2
            if (ten4.shape[2] < c) or ((
                (ten4.shape[2] + c - 2) % step_x) != 0):
                raise TypeError(
                    f"neib_shape[0]={int(c)}, neib_step[0]={int(step_x)} and"
                    f" ten4.shape[2]={int(ten4.shape[2])} not consistent")
            if (ten4.shape[3] < d) or ((
                (ten4.shape[3] + d - 2) % step_y) != 0):
                raise TypeError(
                    f"neib_shape[0]={int(d)}, neib_step[0]={int(step_y)} and"
                    f" ten4.shape[3]={int(ten4.shape[3])} not consistent")
            # number of patch in height
            grid_c = 1 + ((ten4.shape[2] + c - 2) // step_x)
            # number of patch in width
            grid_d = 1 + ((ten4.shape[3] + d - 2) // step_y)
        else:
            raise TypeError(f"Images2Neibs: unknown mode '{mode}'")
        z_dim0 = grid_c * grid_d * ten4.shape[1] * ten4.shape[0]
        z_dim1 = c * d
        z[0] = np.empty((z_dim0, z_dim1), dtype=node.outputs[0].dtype)

        nb_batch = ten4.shape[0]
        nb_stack = ten4.shape[1]
        height = ten4.shape[2]
        width = ten4.shape[3]

        wrap_centered_half_idx_shift_x = c // 2
        wrap_centered_half_idx_shift_y = d // 2
        for n in range(nb_batch):
            for s in range(nb_stack):
                # loop over the number of patch in height
                for a in range(grid_c):
                    # loop over the number of patch in width
                    for b in range(grid_d):
                        z_row = b + grid_d * (a + grid_c * (s + nb_stack * n))
                        for i in range(c):
                            ten4_2 = i + a * step_x
                            if mode == "wrap_centered":
                                ten4_2 -= wrap_centered_half_idx_shift_x
                                if ten4_2 < 0:
                                    ten4_2 += height
                                elif ten4_2 >= height:
                                    ten4_2 -= height
                            elif mode == "half":
                                ten4_2 -= wrap_centered_half_idx_shift_x
                            elif mode == "full":
                                ten4_2 -= c - 1
                            if ten4_2 < 0 or ten4_2 >= height:
                                z[0][z_row, d * i:d * i + d] = 0
                            else:
                                for j in range(d):
                                    ten4_3 = j + b * step_y
                                    if mode == "wrap_centered":
                                        ten4_3 -= wrap_centered_half_idx_shift_y
                                        if ten4_3 < 0:
                                            ten4_3 += width
                                        elif ten4_3 >= width:
                                            ten4_3 -= width
                                    elif mode == "half":
                                        ten4_3 -= wrap_centered_half_idx_shift_y
                                    elif mode == "full":
                                        ten4_3 -= d - 1
                                    z_col = j + d * i
                                    if ten4_3 < 0 or ten4_3 >= width:
                                        z[0][z_row, z_col] = 0
                                    else:
                                        z[0][z_row, z_col] = ten4[n, s, ten4_2,
                                                                  ten4_3]

    def infer_shape(self, fgraph, node, input_shape):
        in_shape = input_shape[0]
        c, d = node.inputs[1]
        step_x, step_y = node.inputs[2]
        if self.mode == "wrap_centered":
            grid_c = ceil_intdiv(in_shape[2], step_x)
            grid_d = ceil_intdiv(in_shape[3], step_y)
        elif self.mode == "valid":
            grid_c = 1 + ((in_shape[2] - c) // step_x)
            grid_d = 1 + ((in_shape[3] - d) // step_y)
        elif self.mode == "ignore_borders":
            grid_c = 1 + ((in_shape[2] - c) // step_x)
            grid_d = 1 + ((in_shape[3] - d) // step_y)
        elif self.mode == "half":
            grid_c = 1 + ((in_shape[2] - (c % 2)) // step_x)
            grid_d = 1 + ((in_shape[3] - (d % 2)) // step_y)
        elif self.mode == "full":
            grid_c = 1 + ((in_shape[2] + c - 2) // step_x)
            grid_d = 1 + ((in_shape[3] + d - 2) // step_y)
        else:
            raise TypeError(f"Images2Neibs: unknown mode '{self.mode}'")
        z_dim0 = grid_c * grid_d * in_shape[1] * in_shape[0]
        z_dim1 = c * d
        return [(z_dim0, z_dim1)]

    def c_code(self, node, name, inp, out, sub):
        return """
#ifndef CEIL_INTDIV
#define CEIL_INTDIV(a, b) ((a/b) + ((a %% b) ? 1: 0))
#endif

        int grid_c = -1; //number of patch in height
        int grid_d = -1; //number of patch in width
        {
        if (PyArray_NDIM(%(ten4)s) != 4)
        {
            PyErr_Format(PyExc_TypeError, "ten4 wrong rank");
            %(fail)s;
        }
        if (PyArray_NDIM(%(neib_shape)s) != 1)
        {
            PyErr_Format(PyExc_TypeError, "neib_shape wrong rank");
            %(fail)s;
        }
        if ( (PyArray_DIMS(%(neib_shape)s))[0] != 2)
        {
            PyErr_Format(PyExc_TypeError, "neib_shape wrong shape ; has to"
                                          " contain 2 elements");
            %(fail)s;
        }
        if (PyArray_NDIM(%(neib_step)s) != 1)
        {
            PyErr_Format(PyExc_TypeError, "neib_step wrong rank");
            %(fail)s;
        }
        if ( (PyArray_DIMS(%(neib_step)s))[0] != 2)
        {
            PyErr_Format(PyExc_TypeError,
                         "neib_step wrong step ; has to contain 2 elements");
            %(fail)s;
        }

        // (c,d) = neib_shape
        const npy_intp c = (npy_intp) *(dtype_%(neib_shape)s*) PyArray_GETPTR1(%(neib_shape)s, 0);
        const npy_intp d = (npy_intp) *(dtype_%(neib_shape)s*) PyArray_GETPTR1(%(neib_shape)s, 1);
        // (step_x,step_y) = neib_step
        const dtype_%(neib_step)s step_x = *(dtype_%(neib_step)s*) PyArray_GETPTR1(%(neib_step)s, 0);
        const dtype_%(neib_step)s step_y = *(dtype_%(neib_step)s*) PyArray_GETPTR1(%(neib_step)s, 1);

        if (step_x <=0 || step_y <=0)
        {
            PyErr_Format(PyExc_ValueError,
                         "neib_step wrong step ; values <= 0. Got %%lld %%lld.",
                         (long long) step_x, (long long) step_y);
            %(fail)s;
        }

        if (c <=0 || d <=0)
        {
            PyErr_Format(PyExc_ValueError,
                         "neib_shape values <= 0. Got %%lld %%lld.",
                         (long long)c, (long long)d);
            %(fail)s;
        }

        if (%(mode)s == MODE_WRAP_CENTERED) {
            if (c%%2!=1 || d%%2!=1){
                PyErr_Format(PyExc_TypeError,
                             "Images2Neibs: in mode wrap_centered"
                             " need patch with odd shapes");
                %(fail)s;
            }
            if ( (PyArray_DIMS(%(ten4)s))[2] < c ||
                 (PyArray_DIMS(%(ten4)s))[3] < d)
            {
                PyErr_Format(PyExc_TypeError,
                    "Images2Neibs: in wrap_centered mode, don't support image"
                    " shapes smaller then the patch shapes:"
                    " neib_shape=(%%ld,%%ld), ten4[2:]=[%%ld,%%ld]",
                    (long int)c, (long int)d,
                    (long int)(PyArray_DIMS(%(ten4)s)[2]),
                    (long int)(PyArray_DIMS(%(ten4)s)[3]));
                %(fail)s;
            }
            grid_c = CEIL_INTDIV(((PyArray_DIMS(%(ten4)s))[2]),step_x);
            grid_d = CEIL_INTDIV(((PyArray_DIMS(%(ten4)s))[3]),step_y);

        } else if (%(mode)s == MODE_VALID) {
            if ( ((PyArray_DIMS(%(ten4)s))[2] < c) ||
                 ( (((PyArray_DIMS(%(ten4)s))[2]-c) %% step_x)!=0))
            {
                PyErr_Format(PyExc_TypeError,
                             "neib_shape[0]=%%ld, neib_step[0]=%%ld and"
                             " ten4.shape[2]=%%ld not consistent",
                             (long int)c, (long int)step_x,
                             (long int)(PyArray_DIMS(%(ten4)s)[2]));
                %(fail)s;
            }
            if ( ((PyArray_DIMS(%(ten4)s))[3] < d) ||
                 ( (((PyArray_DIMS(%(ten4)s))[3]-d) %% step_y)!=0))
            {
                PyErr_Format(PyExc_TypeError,
                             "neib_shape[1]=%%ld, neib_step[1]=%%ld and"
                             " ten4.shape[3]=%%ld not consistent",
                             (long int)d, (long int)step_y,
                             (long int)(PyArray_DIMS(%(ten4)s)[3]));
                %(fail)s;
            }
            //number of patch in height
            grid_c = 1+(((PyArray_DIMS(%(ten4)s))[2]-c)/step_x);
            //number of patch in width
            grid_d = 1+(((PyArray_DIMS(%(ten4)s))[3]-d)/step_y);
        } else if (%(mode)s == MODE_IGNORE_BORDERS) {
            //number of patch in height
            grid_c = 1+(((PyArray_DIMS(%(ten4)s))[2]-c)/step_x);
            //number of patch in width
            grid_d = 1+(((PyArray_DIMS(%(ten4)s))[3]-d)/step_y);
        } else if (%(mode)s == MODE_HALF) {
            if ( ((PyArray_DIMS(%(ten4)s))[2] < c) ||
                 ( (((PyArray_DIMS(%(ten4)s))[2]-(c%%2)) %% step_x)!=0))
            {
                PyErr_Format(PyExc_TypeError,
                             "neib_shape[0]=%%ld, neib_step[0]=%%ld and"
                             " ten4.shape[2]=%%ld not consistent",
                             (long int)c, (long int)step_x,
                             (long int)(PyArray_DIMS(%(ten4)s)[2]));
                %(fail)s;
            }
            if ( ((PyArray_DIMS(%(ten4)s))[3] < d) ||
                 ( (((PyArray_DIMS(%(ten4)s))[3]-(d%%2)) %% step_y)!=0))
            {
                PyErr_Format(PyExc_TypeError,
                             "neib_shape[1]=%%ld, neib_step[1]=%%ld and"
                             " ten4.shape[3]=%%ld not consistent",
                             (long int)d, (long int)step_y,
                             (long int)(PyArray_DIMS(%(ten4)s)[3]));
                %(fail)s;
            }
            //number of patch in height
            grid_c = 1+(((PyArray_DIMS(%(ten4)s))[2]-(c%%2))/step_x);
            //number of patch in width
            grid_d = 1+(((PyArray_DIMS(%(ten4)s))[3]-(d%%2))/step_y);
        } else if (%(mode)s == MODE_FULL) {
            if ( ((PyArray_DIMS(%(ten4)s))[2] < c) ||
                 ( (((PyArray_DIMS(%(ten4)s))[2]+c-2) %% step_x)!=0))
            {
                PyErr_Format(PyExc_TypeError,
                             "neib_shape[0]=%%ld, neib_step[0]=%%ld and"
                             " ten4.shape[2]=%%ld not consistent",
                             (long int)c, (long int)step_x,
                             (long int)(PyArray_DIMS(%(ten4)s)[2]));
                %(fail)s;
            }
            if ( ((PyArray_DIMS(%(ten4)s))[3] < d) ||
                 ( (((PyArray_DIMS(%(ten4)s))[3]+d-2) %% step_y)!=0))
            {
                PyErr_Format(PyExc_TypeError,
                             "neib_shape[1]=%%ld, neib_step[1]=%%ld and"
                             " ten4.shape[3]=%%ld not consistent",
                             (long int)d, (long int)step_y,
                             (long int)(PyArray_DIMS(%(ten4)s)[3]));
                %(fail)s;
            }
            //number of patch in height
            grid_c = 1+(((PyArray_DIMS(%(ten4)s))[2]+c-2)/step_x);
            //number of patch in width
            grid_d = 1+(((PyArray_DIMS(%(ten4)s))[3]+d-2)/step_y);
        } else {
            PyErr_Format(PyExc_TypeError,
                         "Images2Neibs: unknown mode %%d", %(mode)s);
            %(fail)s;
        }

        // new dimensions for z
        const npy_intp z_dim1 = c * d;
        const npy_intp z_dim0 =  grid_c
                            * grid_d
                            * (PyArray_DIMS(%(ten4)s))[1]
                            * (PyArray_DIMS(%(ten4)s))[0];

        if ((NULL == %(z)s)
            || ((PyArray_DIMS(%(z)s))[0] != z_dim0 )
            || ((PyArray_DIMS(%(z)s))[1] != z_dim1 )
        )
        {
            Py_XDECREF(%(z)s);
            npy_intp dims[2];
            dims[0] = z_dim0;
            dims[1] = z_dim1;

            %(z)s = (PyArrayObject*) PyArray_EMPTY(2,
                dims,
                PyArray_TYPE((PyArrayObject*) py_%(ten4)s),
                0);

            if (!%(z)s)
            {
                PyErr_SetString(PyExc_MemoryError, "failed to alloc z output");
                %(fail)s;
            }
        }
        }

        { // NESTED SCOPE

        const int nb_batch = (PyArray_DIMS(%(ten4)s))[0];
        const int nb_stack = (PyArray_DIMS(%(ten4)s))[1];
        const int height = (PyArray_DIMS(%(ten4)s))[2];
        const int width = (PyArray_DIMS(%(ten4)s))[3];

        // (c,d) = neib_shape
        const npy_intp c = (npy_intp) *(dtype_%(neib_shape)s*) PyArray_GETPTR1(%(neib_shape)s, 0);
        const npy_intp d = (npy_intp) *(dtype_%(neib_shape)s*) PyArray_GETPTR1(%(neib_shape)s, 1);
        // (step_x,step_y) = neib_step
        const npy_intp step_x = (npy_intp) *(dtype_%(neib_step)s*) PyArray_GETPTR1(%(neib_step)s, 0);
        const npy_intp step_y = (npy_intp) *(dtype_%(neib_step)s*) PyArray_GETPTR1(%(neib_step)s, 1);

        const int wrap_centered_half_idx_shift_x = c/2;
        const int wrap_centered_half_idx_shift_y = d/2;
        // Oh this is messed up...
        for (int n = 0; n < nb_batch; n++)              // loop over batches
            for (int s = 0; s < nb_stack; s++)          // loop over stacks
                for (int a = 0; a < grid_c; a++)        // loop over the number of patch in height
                    for (int b = 0; b < grid_d; b++)    // loop over the number of patch in width
                    {
                        int z_row = b + grid_d*(a + grid_c*(s + nb_stack*n));
                        for (int i = 0; i < c; i++)     // loop over c
                        {
                            int ten4_2 = i + a * step_x;
                            if (%(mode)s == MODE_WRAP_CENTERED) {
                                ten4_2 -= wrap_centered_half_idx_shift_x;
                                if ( ten4_2 < 0 ) ten4_2 += height;
                                else if (ten4_2 >= height) ten4_2 -= height;
                            } else if (%(mode)s == MODE_HALF) {
                                ten4_2 -= wrap_centered_half_idx_shift_x;
                            } else if (%(mode)s == MODE_FULL) {
                                ten4_2 -= c - 1;
                            }
                            if (ten4_2 < 0 | ten4_2 >= height) {
                                dtype_%(z)s* curr_z = (dtype_%(z)s*) PyArray_GETPTR2(%(z)s, z_row, d * i);
                                memset(curr_z, 0, d*sizeof(*curr_z));
                            } else {
                                for (int j = 0; j < d; j++)  // loop over d
                                {
                                    int ten4_3 = j + b * step_y;
                                    if (%(mode)s == MODE_WRAP_CENTERED) {
                                        ten4_3 -= wrap_centered_half_idx_shift_y;
                                        if ( ten4_3 < 0 ) ten4_3 += width;
                                        else if (ten4_3 >= width) ten4_3 -= width;
                                    } else if (%(mode)s == MODE_HALF) {
                                        ten4_3 -= wrap_centered_half_idx_shift_y;
                                    } else if (%(mode)s == MODE_FULL) {
                                        ten4_3 -= d - 1;
                                    }
                                    int z_col = j + d * i;
                                    dtype_%(z)s* curr_z = (dtype_%(z)s*) PyArray_GETPTR2(%(z)s, z_row, z_col);
                                    if (ten4_3 < 0 | ten4_3 >= width) {
                                        *curr_z = 0;
                                    } else {
                                        *curr_z = *( (dtype_%(ten4)s*) PyArray_GETPTR4(%(ten4)s, n, s, ten4_2, ten4_3));
                                    }
                                }
                            }
                        }
                    }
        } // END NESTED SCOPE
        """ % dict(
            ten4=inp[0],
            neib_shape=inp[1],
            neib_step=inp[2],
            z=out[0],
            fail=sub["fail"],
            mode=sub["params"],
        )
Exemplo n.º 6
0
class CumOp(COp):
    # See function cumsum/cumprod for docstring

    __props__ = ("axis", "mode")
    check_input = False
    params_type = ParamsType(c_axis=int_t,
                             mode=EnumList(("MODE_ADD", "add"),
                                           ("MODE_MUL", "mul")))

    def __init__(self, axis=None, mode="add"):
        if mode not in ("add", "mul"):
            raise ValueError(f'{type(self).__name__}: Unknown mode "{mode}"')
        self.axis = axis
        self.mode = mode

    c_axis = property(lambda self: np.MAXDIMS
                      if self.axis is None else self.axis)

    def make_node(self, x):
        x = aet.as_tensor_variable(x)
        out_type = x.type()

        if self.axis is None:
            out_type = vector(dtype=x.dtype)  # Flatten
        elif self.axis >= x.ndim or self.axis < -x.ndim:
            raise ValueError(f"axis(={self.axis}) out of bounds")

        return Apply(self, [x], [out_type])

    def perform(self, node, inputs, output_storage, params):
        x = inputs[0]
        z = output_storage[0]
        if self.mode == "add":
            z[0] = np.cumsum(x, axis=self.axis)
        else:
            z[0] = np.cumprod(x, axis=self.axis)

    def grad(self, inputs, output_gradients):
        (x, ) = inputs
        (gi, ) = output_gradients

        if self.axis is None:
            if self.mode == "add":
                return [cumsum(gi[::-1])[::-1].reshape(x.shape)]
            elif self.mode == "mul":
                fx = cumprod(x, axis=self.axis)
                return [cumsum((fx * gi)[::-1])[::-1].reshape(x.shape) / x]
            else:
                raise NotImplementedError(
                    f'{type(self).__name__}: unknown gradient for mode "{self.mode}"'
                )

        reverse_slicing = [slice(None, None, None)] * gi.ndim
        reverse_slicing[self.axis] = slice(None, None, -1)
        reverse_slicing = tuple(reverse_slicing)
        # We need to reverse the gradients along ``self.axis``,
        #  compute cumsum, then reverse again
        if self.mode == "add":
            return [cumsum(gi[reverse_slicing], self.axis)[reverse_slicing]]
        elif self.mode == "mul":
            fx = cumprod(x, axis=self.axis)
            return [
                cumsum(
                    (fx * gi)[reverse_slicing], self.axis)[reverse_slicing] / x
            ]
        else:
            raise NotImplementedError(
                f'{type(self).__name__}: unknown gradient for mode "{self.mode}"'
            )

    def infer_shape(self, fgraph, node, shapes):
        if self.axis is None:
            return [(prod(shapes[0]), )]  # Flatten

        return shapes

    def c_code(self, node, name, inames, onames, sub):
        (x, ) = inames
        (z, ) = onames
        axis = self.axis
        fail = sub["fail"]
        params = sub["params"]

        code = ("""
                int axis = %(params)s->c_axis;
                if (axis == 0 && PyArray_NDIM(%(x)s) == 1)
                    axis = NPY_MAXDIMS;
                npy_intp shape[1] = { PyArray_SIZE(%(x)s) };
                if(axis == NPY_MAXDIMS && !(%(z)s && PyArray_DIMS(%(z)s)[0] == shape[0]))
                {
                    Py_XDECREF(%(z)s);
                    %(z)s = (PyArrayObject*) PyArray_SimpleNew(1, shape, PyArray_TYPE((PyArrayObject*) py_%(x)s));
                }

                else if(axis != NPY_MAXDIMS && !(%(z)s && PyArray_CompareLists(PyArray_DIMS(%(z)s), PyArray_DIMS(%(x)s), PyArray_NDIM(%(x)s))))
                {
                    Py_XDECREF(%(z)s);
                    %(z)s = (PyArrayObject*) PyArray_SimpleNew(PyArray_NDIM(%(x)s), PyArray_DIMS(%(x)s), PyArray_TYPE(%(x)s));
                }

                if (!%(z)s)
                    %(fail)s;
                {

                    PyObject * t = NULL;
                    if(%(params)s->mode == MODE_ADD)
                        t = PyArray_CumSum(
                            %(x)s, axis,
                            PyArray_TYPE(%(x)s), %(z)s);
                    else if(%(params)s->mode == MODE_MUL)
                        t = PyArray_CumProd(
                            %(x)s, axis,
                            PyArray_TYPE(%(x)s), %(z)s);

                    if (!t){
                       %(fail)s;
                    }
                    // Because PyArray_CumSum/CumProd returns a newly created reference on t.
                    Py_XDECREF(t);
                }
            """ % locals())

        return code

    def c_code_cache_version(self):
        return (8, )

    def __str__(self):
        return f"{self.__class__.__name__}{{{self.axis}, {self.mode}}}"