def test_hash_and_eq_params_type(self): w1 = ParamsType( a1=TensorType("int64", (False, False)), a2=TensorType("int64", (False, True, False, False, True)), a3=Generic(), ) w2 = ParamsType( a1=TensorType("int64", (False, False)), a2=TensorType("int64", (False, True, False, False, True)), a3=Generic(), ) assert w1 == w2 assert not (w1 != w2) assert hash(w1) == hash(w2) assert w1.name == w2.name # Changing attributes names only. w2 = ParamsType( a1=TensorType("int64", (False, False)), other_name=TensorType( "int64", (False, True, False, False, True)), # a2 -> other_name a3=Generic(), ) assert w1 != w2 # Changing attributes types only. w2 = ParamsType( a1=TensorType("int64", (False, False)), a2=Generic(), # changing class a3=Generic(), ) assert w1 != w2 # Changing attributes types characteristics only. w2 = ParamsType( a1=TensorType("int64", (False, True)), # changing broadcasting a2=TensorType("int64", (False, True, False, False, True)), a3=Generic(), ) assert w1 != w2
class CGer(BaseBLAS, Ger): params_type = ParamsType(destructive=bool_t, ) def c_code(self, node, name, inp, out, sub): A, a, x, y = inp (Z, ) = out code = ger_c_code(A, a, x, y, Z, fail=sub["fail"], params=sub["params"]) return code def c_code_cache_version(self): return (11, blas_header_version())
def test_params_type_with_enums(self): # Test that we fail if we create a params type with common enum names inside different enum types. try: ParamsType(enum1=EnumList("A", "B", "C"), enum2=EnumList("A", "B", "F")) except AttributeError: pass else: raise Exception( "ParamsType should fail with common enum names inside different enum types." ) # Test that we fail if we create a params type with common names in both aliases and constants. try: ParamsType( enum1=EnumList(("A", "a"), ("B", "b")), enum2=EnumList(("ONE", "a"), ("TWO", "two")), ) except AttributeError: ParamsType( enum1=EnumList(("A", "a"), ("B", "b")), enum2=EnumList(("ONE", "one"), ("TWO", "two")), ) else: raise Exception( "ParamsType should fail when there are aliases with same names as some constants." ) # Test that we can access enum values through wrapper directly. w = ParamsType( enum1=EnumList("A", ("B", "beta"), "C"), enum2=EnumList(("D", "delta"), "E", "F"), ) assert w.A == 0 and w.B == 1 and w.C == 2 assert w.D == 0 and w.E == 1 and w.F == 2 # Test constants access through aliases. assert w.enum_from_alias("beta") == w.B assert w.enum_from_alias("delta") == w.D assert (w.enum_from_alias("C") == w.C ) # C is not an alias, so it should return a constant named C. # Test that other regular wrapper attributes are still available. assert len(w.fields) == len(w.types) == w.length assert w.name
class QuadraticCOpFunc(ExternalCOp): __props__ = ("a", "b", "c") params_type = ParamsType(a=tensor_type_0d, b=scalar_type, c=generic_type) def __init__(self, a, b, c): super().__init__("c_code/test_quadratic_function.c", "APPLY_SPECIFIC(compute_quadratic)") self.a = a self.b = b self.c = c def make_node(self, x): x = tensor.as_tensor_variable(x) return Apply(self, [x], [x.type()]) def perform(self, node, inputs, output_storage, coefficients): x = inputs[0] y = output_storage[0] y[0] = coefficients.a * (x**2) + coefficients.b * x + coefficients.c
class CGemv(BaseBLAS, Gemv): params_type = ParamsType(inplace=bool_t, ) def __init__(self, inplace): super().__init__(inplace) def c_code(self, node, name, inp, out, sub): y, alpha, A, x, beta = inp (z, ) = out code = gemv_c_code( y, A, x, z, alpha, beta, fail=sub["fail"], force_init_beta=check_force_gemv_init(), params=sub["params"], ) return code def c_code_cache_version(self): return (14, blas_header_version(), check_force_gemv_init())
class BaseCorr3dMM(gof.OpenMPOp): """ Base class for `Corr3dMM`, `Corr3dMM_gradWeights` and `Corr3dMM_gradInputs`. Cannot be used directly. Every sub-class must define internal attribute ``_direction`` out of __init__(). ``_direction`` must take one of following values: - "forward" to correlate bottom with weights and store results in top. - "backprop weights" to do a valid convolution of bottom with top (swapping the first two dimensions) and store results in weights. - "backprop inputs" to do a full convolution of top with weights (swapping the first two dimensions) and store results in bottom. Parameters ---------- border_mode : {'valid', 'full', 'half'} Additionally, the padding size could be directly specified by an integer or a tuple of three of integers subsample Perform subsampling of the output (default: (1, 1, 1)). filter_dilation Perform dilated correlation (default: (1, 1, 1)) num_groups Perform grouped convolutions (default: 1) """ check_broadcast = False __props__ = ("border_mode", "subsample", "filter_dilation", "num_groups") _direction = None params_type = ParamsType( direction=EnumList( ("DIRECTION_FORWARD", "forward"), # 0 ("DIRECTION_BACKPROP_WEIGHTS", "backprop weights"), # 1 ("DIRECTION_BACKPROP_INPUTS", "backprop inputs"), ), # 2 dH=int64, dW=int64, dD=int64, dilH=int64, dilW=int64, dilD=int64, padH=int64, padW=int64, padD=int64, num_groups=int64, ) def __init__( self, border_mode="valid", subsample=(1, 1, 1), filter_dilation=(1, 1, 1), openmp=None, num_groups=1, ): super().__init__(openmp=openmp) if isinstance(border_mode, int): if border_mode < 0: raise ValueError("invalid border_mode {}, which must be a " "non-negative integer".format(border_mode)) border_mode = (border_mode, border_mode, border_mode) if isinstance(border_mode, tuple): if len(border_mode) != 3 or min(border_mode) < 0: raise ValueError( "invalid border_mode {}, which must be a tuple of " "three non-negative integers".format(border_mode)) pad_h, pad_w, pad_d = map(int, border_mode) border_mode = (pad_h, pad_w, pad_d) if not ((isinstance(border_mode, tuple) and min(border_mode) >= 0) or border_mode in ("valid", "full", "half")): raise ValueError( "invalid border_mode {}, which must be either " '"valid", "full", "half", an integer or a tuple of three' " integers".format(border_mode)) self.border_mode = border_mode if len(subsample) != 3: raise ValueError("subsample must have three elements") if len(filter_dilation) != 3: raise ValueError("filter_dilation must have three elements") self.subsample = tuple(subsample) self.filter_dilation = tuple(filter_dilation) if num_groups < 1: raise ValueError("Number of groups should be greater than 0") self.num_groups = num_groups if not theano.config.blas.ldflags: # Theano will use a NumPy C implementation of [sd]gemm_ instead. self.blas_type = "" else: if "openblas" in theano.config.blas.ldflags: self.blas_type = "openblas" elif "mkl" in theano.config.blas.ldflags: self.blas_type = "mkl" else: self.blas_type = "" if self._direction not in [ "forward", "backprop weights", "backprop inputs" ]: raise ValueError("_direction must be one of 'forward', " "'backprop weights', 'backprop inputs'") @property def pad(self): if self.border_mode == "half": return (-1, -1, -1) elif self.border_mode == "full": return (-2, -2, -2) elif isinstance(self.border_mode, tuple): return self.border_mode else: assert self.border_mode == "valid" return (0, 0, 0) # Direction should be converted to real enum value, # as it is compared to integer later in c_code_helper(). direction = property( lambda self: self.params_type.enum_from_alias(self._direction)) dH = property(lambda self: self.subsample[0]) dW = property(lambda self: self.subsample[1]) dD = property(lambda self: self.subsample[2]) dilH = property(lambda self: self.filter_dilation[0]) dilW = property(lambda self: self.filter_dilation[1]) dilD = property(lambda self: self.filter_dilation[2]) padH = property(lambda self: self.pad[0]) padW = property(lambda self: self.pad[1]) padD = property(lambda self: self.pad[2]) def __str__(self): return "{}{{{}, {}, {}, {}}}".format( self.__class__.__name__, self.border_mode, str(self.subsample), str(self.filter_dilation), str(self.num_groups), ) @staticmethod def as_common_dtype(in1, in2): """ Upcast input variables if necessary. """ dtype = theano.scalar.upcast(in1.dtype, in2.dtype) return in1.astype(dtype), in2.astype(dtype) def __setstate__(self, d): self.__dict__.update(d) if not hasattr(self, "num_groups"): self.num_groups = 1 def c_support_code(self): ccodes = blas_headers.blas_header_text() if self.blas_type == "openblas": ccodes += blas_headers.openblas_threads_text() elif self.blas_type == "mkl": ccodes += blas_headers.mkl_threads_text() return ccodes def c_libraries(self): return ldflags() def c_compile_args(self): compile_args = ldflags(libs=False, flags=True) compile_args += super().c_compile_args() return compile_args def c_lib_dirs(self): return ldflags(libs=False, libs_dir=True) def c_header_dirs(self): return ldflags(libs=False, include_dir=True) def c_headers(self): headers = ["<stdio.h>"] headers += super().c_headers() return headers def c_code_cache_version(self): # raise this whenever modifying any of the support_code_files return (8, self.openmp, blas_header_version()) def c_support_code_apply(self, node, nodename): # REMEMBER TO RAISE c_code_cache_version when changing any of # these files sub = {} dtype = str(node.__dict__["inputs"][0].dtype) assert dtype in ("float32", "float64") if dtype == "float32": sub["gemm"] = "sgemm_" sub["float_type"] = "npy_float" sub["float_typenum"] = "NPY_FLOAT" sub["n_bytes"] = 4 sub["c_float_type"] = "float" else: sub["gemm"] = "dgemm_" sub["float_type"] = "npy_double" sub["float_typenum"] = "NPY_DOUBLE" sub["n_bytes"] = 8 sub["c_float_type"] = "double" if self.openmp: sub["omp_flags"] = "#pragma omp parallel for schedule(static)" sub["omp_get_max_threads"] = "omp_get_max_threads()" sub["omp_get_thread_num"] = "omp_get_thread_num()" if self.blas_type == "openblas": sub["blas_set_num_threads"] = "openblas_set_num_threads" sub["blas_get_num_threads"] = "openblas_get_num_threads()" elif self.blas_type == "mkl": sub["blas_set_num_threads"] = "mkl_set_num_threads" sub["blas_get_num_threads"] = "mkl_get_max_threads()" else: sub["blas_set_num_threads"] = "" sub["blas_get_num_threads"] = "0" else: sub["omp_flags"] = "" sub["omp_get_max_threads"] = "1" sub["omp_get_thread_num"] = "0" sub["blas_set_num_threads"] = "" sub["blas_get_num_threads"] = "0" files = [os.path.join("c_code", "corr3d_gemm.c")] codes = [ open(os.path.join(os.path.split(__file__)[0], f)).read() for f in files ] final_code = "" for code in codes: final_code += code return final_code % sub def c_code_helper(self, bottom, weights, top, sub, height=None, width=None, depth=None): """ This generates the C code for Corr3dMM (direction="forward"), Corr3dMM_gradWeights (direction="backprop weights"), and Corr3dMM_gradInputs (direction="backprop inputs"). Depending on the direction, one of bottom, weights, top will receive the output, while the other two serve as inputs. :param bottom: Variable name of the input images in the forward pass, or the gradient of the input images in backprop wrt. inputs :param weights: Variable name of the filters in the forward pass, or the gradient of the filters in backprop wrt. weights :param top: Variable name of the output images / feature maps in the forward pass, or the gradient of the outputs in the backprop passes :param sub: Dictionary of substitutions useable to help generating the C code. :param height: If self.subsample[0] != 1, a variable giving the height of the filters for direction="backprop weights" or the height of the input images for direction="backprop inputs". If self.border_mode == 'half', a variable giving the height of the filters for direction="backprop weights". Ignored otherwise. :param width: If self.subsample[1] != 1, a variable giving the width of the filters for direction="backprop weights" or the width of the input images for direction="backprop inputs". If self.border_mode == 'half', a variable giving the width of the filters for direction="backprop weights". Ignored otherwise. :param depth: If self.subsample[1] != 1, a variable giving the depth of the filters for direction="backprop weights" or the depth of the input images for direction="backprop inputs". If self.border_mode == 'half', a variable giving the depth of the filters for direction="backprop weights". Ignored otherwise. """ # When subsampling, we cannot unambiguously infer the height and width # of bottom and weights from top, so we require them to be given. # Similarly, when border_mode="half", we cannot infer the weight size. if height: height = f"(*(npy_int64 *)(PyArray_DATA({height})))" else: if ((self.direction != 0) and (self.dH != 1)) or ((self.direction == 1) and (self.padH == -1)): raise ValueError( "height must be given for backprop with vertical sampling or border_mode='half'" ) height = "-1" if width: width = f"(*(npy_int64 *)(PyArray_DATA({width})))" else: if ((self.direction != 0) and (self.dW != 1)) or ((self.direction == 1) and (self.padW == -1)): raise ValueError( "width must be given for backprop with horizontal sampling or border_mode='half'" ) width = "-1" if depth: depth = f"(*(npy_int64 *)(PyArray_DATA({depth})))" else: if ((self.direction != 0) and (self.dD != 1)) or ((self.direction == 1) and (self.padD == -1)): raise ValueError( "depth must be given for backprop with depth sampling or border_mode='half'" ) depth = "-1" return """ // Mandatory args int direction = %(params)s->direction; // forward, bprop weights, bprop inputs // Optional args int dH = %(params)s->dH; int dW = %(params)s->dW; int dD = %(params)s->dD; int dilH = %(params)s->dilH; int dilW = %(params)s->dilW; int dilD = %(params)s->dilD; int padH = %(params)s->padH; int padW = %(params)s->padW; int padD = %(params)s->padD; int numgroups = %(params)s->num_groups; PyArrayObject * bottom = %(bottom)s; PyArrayObject * weights = %(weights)s; PyArrayObject * top = %(top)s; PyArrayObject * out2 = NULL; PyArrayObject **out = NULL; switch(%(params)s->direction) { case DIRECTION_FORWARD: out = &%(top)s; break; case DIRECTION_BACKPROP_WEIGHTS: out = &%(weights)s; break; case DIRECTION_BACKPROP_INPUTS: out = &%(bottom)s; break; default: PyErr_SetString(PyExc_ValueError, "CPU Corr3dMM: Invalid direction."); {%(fail)s} break; } // Obtain or infer kernel width, height and depth // (we need to know it early to be able to handle auto-padding) int kH, kW, kD, dil_kH, dil_kW, dil_kD; if (direction != 1) { // weight is an input variable, we can just read its shape kH = PyArray_DIMS(weights)[2]; kW = PyArray_DIMS(weights)[3]; kD = PyArray_DIMS(weights)[4]; } else { if (%(height)s != -1) { // kernel height is specified (perhaps vertical subsampling or half padding) kH = %(height)s; } else if (padH == -2) { // vertical full padding, we can infer the kernel height kH = (2 - PyArray_DIMS(bottom)[2] + (PyArray_DIMS(top)[2] - 1) * dH - 1)/ dilH + 1; } else { // explicit padding, we can infer the kernel height kH = (PyArray_DIMS(bottom)[2] + 2*padH - (PyArray_DIMS(top)[2] - 1) * dH - 1) / dilH +1; } if (%(width)s != -1) { kW = %(width)s; } else if (padW == -2) { kW = (2 - PyArray_DIMS(bottom)[3] + (PyArray_DIMS(top)[3] - 1) * dW - 1) / dilW + 1; } else { kW = (PyArray_DIMS(bottom)[3] + 2*padW - (PyArray_DIMS(top)[3] - 1) * dW - 1) / dilW + 1; } if (%(depth)s != -1) { kD = %(depth)s; } else if (padD == -2) { kD = (2 - PyArray_DIMS(bottom)[4] + (PyArray_DIMS(top)[4] - 1) * dD - 1) / dilD + 1; } else { kD = (PyArray_DIMS(bottom)[4] + 2*padD - (PyArray_DIMS(top)[4] - 1) * dD - 1) / dilD + 1; } } // Implicit dilated kernel size dil_kH = (kH - 1) * dilH + 1; dil_kW = (kW - 1) * dilW + 1; dil_kD = (kD - 1) * dilD + 1; // Auto-padding if requested if (padH == -1) { // vertical half padding padH = dil_kH / 2; } else if (padH == -2) { // vertical full padding padH = dil_kH - 1; } else if (padH < 0) { PyErr_SetString(PyExc_ValueError, "BaseCorr3dMM: padH must be >= -2"); %(fail)s } if (padW == -1) { // horizontal half padding padW = dil_kW / 2; } else if (padW == -2) { // horizontal full padding padW = dil_kW - 1; } else if (padW < 0) { PyErr_SetString(PyExc_ValueError, "BaseCorr3dMM: padW must be >= -2"); %(fail)s } if (padD == -1) { // depth half padding padD = dil_kD / 2; } else if (padD == -2) { // depth full padding padD = dil_kD - 1; } else if (padD < 0) { PyErr_SetString(PyExc_ValueError, "BaseCorr3dMM: padD must be >= -2"); %(fail)s } // Infer output shape npy_intp out_dim[5]; switch(direction) { case 0: // forward pass // output is top: (batchsize, num_filters, height, width, depth) // height and width: top = (bottom + 2*pad - ((weight-1)*dil + 1)) / sample + 1 out_dim[0] = (npy_intp)PyArray_DIMS(bottom)[0]; out_dim[1] = (npy_intp)PyArray_DIMS(weights)[0]; out_dim[2] = (npy_intp)((PyArray_DIMS(bottom)[2] + 2*padH - ((PyArray_DIMS(weights)[2]-1)*dilH + 1)) / dH + 1); out_dim[3] = (npy_intp)((PyArray_DIMS(bottom)[3] + 2*padW - ((PyArray_DIMS(weights)[3]-1)*dilW + 1)) / dW + 1); out_dim[4] = (npy_intp)((PyArray_DIMS(bottom)[4] + 2*padD - ((PyArray_DIMS(weights)[4]-1)*dilD + 1)) / dD + 1); if (out_dim[0] < 0 || out_dim[1] < 0 || out_dim[2] <= 0 || out_dim[3] <= 0 || out_dim[4] <= 0) { PyErr_Format(PyExc_ValueError, "Corr3dMM: impossible output shape\\n" " bottom shape: %%ld x %%ld x %%ld x %%ld x %%ld\\n" " weights shape: %%ld x %%ld x %%ld x %%ld x %%ld\\n" " top shape: %%ld x %%ld x %%ld x %%ld x %%ld\\n", (long int)PyArray_DIMS(bottom)[0], (long int)PyArray_DIMS(bottom)[1], (long int)PyArray_DIMS(bottom)[2], (long int)PyArray_DIMS(bottom)[3], (long int)PyArray_DIMS(bottom)[4], (long int)PyArray_DIMS(weights)[0], (long int)PyArray_DIMS(weights)[1], (long int)PyArray_DIMS(weights)[2], (long int)PyArray_DIMS(weights)[3], (long int)PyArray_DIMS(weights)[4], (long int)out_dim[0], (long int)out_dim[1], (long int)out_dim[2], (long int)out_dim[3], (long int)out_dim[4]); %(fail)s } break; case 1: // backprop wrt. weights // output is weights: (num_filters, num_channels, height, width, depth) // height and width: weights = (bottom + 2*pad - (top - 1) * sample - 1) / dil + 1 out_dim[0] = (npy_intp)PyArray_DIMS(top)[1]; out_dim[1] = (npy_intp)PyArray_DIMS(bottom)[1] / numgroups; out_dim[2] = (npy_intp)kH; // already inferred further above out_dim[3] = (npy_intp)kW; // how convenient out_dim[4] = (npy_intp)kD; if (out_dim[0] < 0 || out_dim[1] < 0 || out_dim[2] <= 0 || out_dim[3] <= 0 || out_dim[4] <= 0) { PyErr_Format(PyExc_ValueError, "Corr3dMM backprop wrt. weights: impossible output shape\\n" " bottom shape: %%ld x %%ld x %%ld x %%ld x %%ld\\n" " weights shape: %%ld x %%ld x %%ld x %%ld x %%ld\\n" " top shape: %%ld x %%ld x %%ld x %%ld x %%ld\\n", (long int)PyArray_DIMS(bottom)[0], (long int)PyArray_DIMS(bottom)[1], (long int)PyArray_DIMS(bottom)[2], (long int)PyArray_DIMS(bottom)[3], (long int)PyArray_DIMS(bottom)[4], (long int)out_dim[0], (long int)out_dim[1], (long int)out_dim[2], (long int)out_dim[3], (long int)out_dim[4], (long int)PyArray_DIMS(top)[0], (long int)PyArray_DIMS(top)[1], (long int)PyArray_DIMS(top)[2], (long int)PyArray_DIMS(top)[3], (long int)PyArray_DIMS(top)[4]); %(fail)s } break; case 2: // backprop wrt. inputs // output is bottom: (batchsize, num_channels, height, width, depth) // height and width: bottom = (top - 1) * sample + (weights-1)*dil + 1 - 2*pad out_dim[0] = (npy_intp)PyArray_DIMS(top)[0]; out_dim[1] = (npy_intp)PyArray_DIMS(weights)[1] * numgroups; out_dim[2] = (npy_intp)((%(height)s != -1) ? %(height)s : (PyArray_DIMS(top)[2] - 1) * dH + (PyArray_DIMS(weights)[2]-1)*dilH + 1 - 2*padH); out_dim[3] = (npy_intp)((%(width)s != -1) ? %(width)s : (PyArray_DIMS(top)[3] - 1) * dW + (PyArray_DIMS(weights)[3]-1)*dilW + 1 - 2*padW); out_dim[4] = (npy_intp)((%(depth)s != -1) ? %(depth)s : (PyArray_DIMS(top)[4] - 1) * dD + (PyArray_DIMS(weights)[4]-1)*dilD + 1 - 2*padD); if (out_dim[0] < 0 || out_dim[1] < 0 || out_dim[2] <= 0 || out_dim[3] <= 0 || out_dim[4] <= 0) { PyErr_Format(PyExc_ValueError, "Corr3dMM backprop wrt. inputs: impossible output shape\\n" " bottom shape: %%ld x %%ld x %%ld x %%ld x %%ld\\n" " weights shape: %%ld x %%ld x %%ld x %%ld x %%ld\\n" " top shape: %%ld x %%ld x %%ld x %%ld x %%ld\\n", (long int)out_dim[0], (long int)out_dim[1], (long int)out_dim[2], (long int)out_dim[3], (long int)out_dim[4], (long int)PyArray_DIMS(weights)[0], (long int)PyArray_DIMS(weights)[1], (long int)PyArray_DIMS(weights)[2], (long int)PyArray_DIMS(weights)[3], (long int)PyArray_DIMS(weights)[4], (long int)PyArray_DIMS(top)[0], (long int)PyArray_DIMS(top)[1], (long int)PyArray_DIMS(top)[2], (long int)PyArray_DIMS(top)[3], (long int)PyArray_DIMS(top)[4]); %(fail)s } break; default: PyErr_SetString(PyExc_ValueError, "BaseCorr3dMM: direction must be 0, 1, or 2\\n"); %(fail)s } // Prepare output array int typenum; if ( !(*out && PyArray_NDIM(*out)==4 && PyArray_IS_C_CONTIGUOUS(*out) && PyArray_DIMS(*out)[0]==out_dim[0] && PyArray_DIMS(*out)[1]==out_dim[1] && PyArray_DIMS(*out)[2]==out_dim[2] && PyArray_DIMS(*out)[3]==out_dim[3] && PyArray_DIMS(*out)[4]==out_dim[4])) { Py_XDECREF(*out); if (direction != 1) { typenum = PyArray_TYPE(weights); } else { typenum = PyArray_TYPE(bottom); } //Change to PyArray_ZEROS which is faster than PyArray_EMPTY. *out = (PyArrayObject*)PyArray_ZEROS(5, out_dim, typenum, 0); if (NULL == *out) { PyErr_Format(PyExc_RuntimeError, "BaseCorr3dMM: Failed to allocate output of %%lld x %%lld x %%lld x %%lld x %%lld", (long long)out_dim[0], (long long)out_dim[1], (long long)out_dim[2], (long long)out_dim[3], (long long)out_dim[4]); %(fail)s } } // Call corr3dMM code out2 = corr3dMM(%(bottom)s, %(weights)s, %(top)s, direction, dH, dW, dD, dilH, dilW, dilD, padH, padW, padD, numgroups); if (out2==NULL){ %(fail)s } assert (out2 == *out); """ % dict( bottom=bottom, weights=weights, top=top, height=height, width=width, depth=depth, fail=sub["fail"], params=sub["params"], )
class GpuAdvancedIncSubtensor1(COp): """ Implement AdvancedIncSubtensor1 on the gpu. """ _f16_ok = True __props__ = ("inplace", "set_instead_of_inc") params_type = ParamsType( inplace=bool_t, set_instead_of_inc=bool_t, context=gpu_context_type, # following params are used into c_init_code_struct(), # as inputs are not available in that function. ndim_input_0=size_t, ndim_input_1=size_t, typecode_input_0=int_t, typecode_input_1=int_t, ) def __init__(self, inplace=False, set_instead_of_inc=False): self.inplace = inplace self.set_instead_of_inc = set_instead_of_inc if inplace: self.destroy_map = {0: [0]} def clone_inplace(self): return self.__class__(inplace=True, set_instead_of_inc=self.set_instead_of_inc) def make_node(self, x, y, ilist): ctx_name = infer_context_name(x, y) x_ = as_gpuarray_variable(x, ctx_name) y_ = as_gpuarray_variable(y, ctx_name) ilist_ = tt.as_tensor_variable(ilist) assert x_.type.ndim >= y_.type.ndim if ilist_.type.dtype not in tt.integer_dtypes: raise TypeError("index must be integers") if ilist_.type.ndim != 1: raise TypeError("index must be vector") if x_.type.ndim == 0: raise TypeError("cannot index into a scalar") if y_.type.ndim > x_.type.ndim: if self.set_instead_of_inc: opname = "set" else: opname = "increment" raise TypeError( "cannot %s x subtensor with ndim=%s by y with ndim=%s " % (opname, x_.type.ndim, y_.type.ndim) ) return gof.Apply(self, [x_, y_, ilist_], [x_.type()]) def get_params(self, node): return self.params_type.get_params( self, context=node.outputs[0].type.context, # following params are used into c_init_code_struct(). ndim_input_0=node.inputs[0].ndim, ndim_input_1=node.inputs[1].ndim, typecode_input_0=node.inputs[0].type.typecode, typecode_input_1=node.inputs[1].type.typecode, ) # We can't use the parent version that loops on each index # as we also need to loop when set_instead_of_inc is True and the # parent doesn't loop in that case. def perform(self, node, inp, out_, params=None): # TODO opt to make this inplace x, y, idx = inp (out,) = out_ if not self.inplace: x = x.copy() out[0] = x if len(idx) == 0: return # Make sure idx is not a GpuArray otherwise we cannot use its # content to index x and y (This is because we serve as # fallback for _dev20). if isinstance(idx, gpuarray.GpuArray): idx = np.asarray(idx) # If `y` has as many dimensions as `x`, then we want to iterate # jointly on `x` and `y`. Otherwise, it means `y` should be # broadcasted to fill all relevant rows of `x`. if y.ndim == x.ndim and y.shape[0] != 1: assert len(y) == len(idx) if self.set_instead_of_inc: for (j, i) in enumerate(idx): x[i] = y[j] else: k = get_iadd(node.inputs[0], node.inputs[1]) for (j, i) in enumerate(idx): k(x[i], y[j], broadcast=True) else: if y.ndim == x.ndim: # First dim is always 1 in this case. reshaped_y = y.reshape(y.shape[1:]) else: nb_dims_to_add = (x.ndim - 1) - y.ndim reshaped_y = y.reshape((1,) * nb_dims_to_add + y.shape) if self.set_instead_of_inc: for i in idx: x[i] = reshaped_y else: k = get_iadd(node.inputs[0], node.inputs[1]) for i in idx: k(x[i], reshaped_y, broadcast=True) def c_headers(self): return [ "<numpy_compat.h>", "<gpuarray/error.h>", "<gpuarray/array.h>", "<gpuarray/elemwise.h>", "gpuarray_helper.h", ] def c_header_dirs(self): return [gpuarray_helper_inc_dir()] def c_support_code_struct(self, node, nodename): return "\nGpuElemwise *iadd;\n" def c_init_code_struct(self, node, name, sub): return """ gpuelemwise_arg args[2] = {{0}}; args[0].name = "a"; args[0].typecode = %(params)s->typecode_input_0; args[0].flags = GE_READ|GE_WRITE; args[1].name = "b"; args[1].typecode = %(params)s->typecode_input_1; args[1].flags = GE_READ; iadd = GpuElemwise_new(%(params)s->context->ctx, "", "a += b", 2, args, %(params)s->ndim_input_1, GE_CONVERT_F16); if (iadd == NULL) { PyErr_SetString(PyExc_RuntimeError, "Could not intialize inplace add support"); %(fail)s } """ % dict( params=sub["params"], fail=sub["fail"] ) def c_code(self, node, name, inputs, outputs, sub): if node.inputs[0].ndim != node.inputs[1].ndim: raise NotImplementedError("This case does not have C code yet.") return """ PyGpuArrayObject *row_x, *row_y; size_t nd = %(params)s->ndim_input_0; ssize_t *start = NULL, *step = NULL; size_t num_indices, j; int ret; int broadcast_y; start = (ssize_t*)malloc(nd * sizeof(ssize_t)); step = (ssize_t*)malloc(nd * sizeof(ssize_t)); if (start == NULL || step == NULL) { PyErr_NoMemory(); %(fail)s } for (j = 0; j < nd; ++j) { start[j] = 0; step[j] = 1; } step[0] = 0; num_indices = PyArray_SIZE(%(ind)s); if (!%(params)s->inplace) { %(out)s = theano_try_copy(%(out)s, %(x)s); if (%(out)s == NULL) { // Exception already set %(fail)s } } else { Py_XDECREF(%(out)s); %(out)s = %(x)s; Py_INCREF(%(out)s); } if (num_indices != 0) { if ((num_indices - 1) > LONG_MAX) { PyErr_Format(PyExc_AssertionError, "num_indices %%lld exceeds LONG_MAX + 1", (long long)num_indices); %(fail)s } broadcast_y = PyGpuArray_DIM(%(y)s, 0) == 1; for (j = 0; j < num_indices; j++) { start[0] = *(dtype_%(ind)s *)PyArray_GETPTR1(%(ind)s, j); if (start[0] < 0) start[0] += PyGpuArray_DIM(%(out)s, 0); if (start[0] < 0 || start[0] >= PyGpuArray_DIM(%(out)s, 0)) { PyErr_SetString(PyExc_IndexError, "index out of bounds"); %(fail)s; } row_x = pygpu_index(%(out)s, start, (ssize_t *)PyGpuArray_DIMS(%(out)s), step); if (row_x == NULL) %(fail)s; if (broadcast_y) start[0] = 0; else start[0] = j; row_y = pygpu_index(%(y)s, start, (ssize_t *)PyGpuArray_DIMS(%(y)s), step); if (row_y == NULL) { Py_DECREF(row_x); %(fail)s; } if (%(params)s->set_instead_of_inc) { ret = GpuArray_setarray(&row_x->ga, &row_y->ga); } else { void *args[2]; args[0] = (void *)&row_x->ga; args[1] = (void *)&row_y->ga; ret = GpuElemwise_call(iadd, args, GE_BROADCAST | GE_PADSHAPE); } Py_DECREF(row_x); Py_DECREF(row_y); if (ret != GA_NO_ERROR) PyErr_SetString(PyExc_RuntimeError, "Failed to set/inc elements"); } } free(start); free(step); """ % dict( x=inputs[0], y=inputs[1], ind=inputs[2], out=outputs[0], params=sub["params"], fail=""" { free(start); free(step); %(fail)s } """ % dict(fail=sub["fail"]), ) def c_code_cache_version(self): return (5,)
def test_params_type_filtering(self): shape_tensor5 = (1, 2, 2, 3, 2) size_tensor5 = (shape_tensor5[0] * shape_tensor5[1] * shape_tensor5[2] * shape_tensor5[3] * shape_tensor5[4]) random_tensor = np.random.normal( size=size_tensor5).reshape(shape_tensor5) w = ParamsType( a1=TensorType("int32", (False, False)), a2=TensorType("float64", (False, False, False, False, False)), a3=Generic(), ) # With a value that does not match the params type. o = Params( w, a1=np.asarray([[1, 2, 3, 4, 5, 6], [7, 8, 9, 10, 11, 12]]).astype("int64"), a2=random_tensor.astype("float32"), a3=2000, ) # should fail (o.a1 is not int32, o.a2 is not float64) with pytest.raises(TypeError): w.filter(o, True) # should fail (o.a1 is not int32, o.a2 is not float64, and downcast is disallowed) with pytest.raises(TypeError): w.filter(o, False, False) # Should pass. w.filter(o, strict=False, allow_downcast=True) # With a value that matches the params type. o1 = Params( w, a1=np.asarray([[1, 2, 3, 4, 5, 6], [7, 8, 9, 10, 11, 12]]).astype("int32"), a2=random_tensor.astype("float64"), a3=2000, ) # All should pass. w.filter(o1, strict=True) w.filter(o1, strict=False, allow_downcast=False) w.filter(o1, strict=False, allow_downcast=True) # Check values_eq and values_eq_approx. o2 = Params( w, a1=np.asarray([[1, 2, 3, 4, 5, 6], [7, 8, 9, 10, 11, 12]]).astype("int32"), a2=random_tensor.astype("float64"), a3=2000, ) assert w.values_eq(o1, o2) assert w.values_eq_approx(o1, o2) # Check value_eq_approx. # NB: I don't know exactly which kind of differences is rejected by values_eq but accepted by values_eq_approx. # So, I just play a little with float values. o3 = Params( w, a1=np.asarray([[1, 2, 3, 4, 5, 6], [7, 8, 9, 10, 11, 12]]).astype("int32"), a2=(random_tensor.astype("float32") * 10 / 2.2 * 2.19999999999 / 10).astype("float64"), a3=2000.0 - 0.00000000000000001, ) assert w.values_eq_approx(o1, o3)
class QuadraticOpFunc(COp): __props__ = ("a", "b", "c") params_type = ParamsType(a=tensor_type_0d, b=scalar_type, c=generic_type) def __init__(self, a, b, c): self.a = a self.b = b self.c = c def make_node(self, x): x = tensor.as_tensor_variable(x) return Apply(self, [x], [x.type()]) def perform(self, node, inputs, output_storage, coefficients): x = inputs[0] y = output_storage[0] y[0] = coefficients.a * (x**2) + coefficients.b * x + coefficients.c def c_code_cache_version(self): return (1, 5) def c_support_code_apply(self, node, name): float_type = node.inputs[0].type.dtype_specs()[1] return """ /* Computes: x = a*x*x + b*x + c for x in tensor. */ int quadratic_%(name)s(PyArrayObject* tensor, %(float_type)s a, %(float_type)s b, %(float_type)s c) { NpyIter* iterator = NpyIter_New(tensor, NPY_ITER_READWRITE | NPY_ITER_EXTERNAL_LOOP | NPY_ITER_REFS_OK, NPY_KEEPORDER, NPY_NO_CASTING, NULL); if(iterator == NULL) { PyErr_SetString(PyExc_RuntimeError, "Unable to iterate over a tensor for an elemwise operation."); return -1; } NpyIter_IterNextFunc* get_next = NpyIter_GetIterNext(iterator, NULL); char** data_ptr = NpyIter_GetDataPtrArray(iterator); npy_intp* stride_ptr = NpyIter_GetInnerStrideArray(iterator); npy_intp* innersize_ptr = NpyIter_GetInnerLoopSizePtr(iterator); do { char* data = *data_ptr; npy_intp stride = *stride_ptr; npy_intp count = *innersize_ptr; while(count) { %(float_type)s x = *((%(float_type)s*)data); *((%(float_type)s*)data) = a*x*x + b*x + c; data += stride; --count; } } while(get_next(iterator)); NpyIter_Deallocate(iterator); return 0; } """ % { "name": name, "float_type": float_type, } def c_code(self, node, name, inputs, outputs, sub): return """ %(float_type)s a = (%(float_type)s) (*(npy_float64*) PyArray_GETPTR1(%(coeff)s->a, 0)); // 0-D TensorType. %(float_type)s b = %(coeff)s->b; // Scalar. %(float_type)s c = (%(float_type)s) PyFloat_AsDouble(%(coeff)s->c); // Generic. Py_XDECREF(%(Y)s); %(Y)s = (PyArrayObject*)PyArray_EMPTY(PyArray_NDIM(%(X)s), PyArray_DIMS(%(X)s), PyArray_TYPE(%(X)s), PyArray_IS_F_CONTIGUOUS(%(X)s)); if (PyArray_CopyInto(%(Y)s, %(X)s) != 0) { PyErr_SetString(PyExc_RuntimeError, "Unable to copy input into output."); %(fail)s }; if (quadratic_%(name)s(%(Y)s, a, b, c) != 0) { PyErr_SetString(PyExc_RuntimeError, "Unable to compute quadratic function."); %(fail)s } """ % dict( name=name, coeff=sub["params"], fail=sub["fail"], X=inputs[0], Y=outputs[0], float_type=node.inputs[0].type.c_element_type(), )
def test_hash_and_eq_params(self): wp1 = ParamsType( a=Generic(), array=TensorType("int64", (False, )), floatting=Scalar("float64"), npy_scalar=TensorType("float64", tuple()), ) wp2 = ParamsType( a=Generic(), array=TensorType("int64", (False, )), floatting=Scalar("float64"), npy_scalar=TensorType("float64", tuple()), ) w1 = Params( wp1, a=1, array=np.asarray([1, 2, 4, 5, 7]), floatting=-4.5, npy_scalar=np.asarray(12), ) w2 = Params( wp2, a=1, array=np.asarray([1, 2, 4, 5, 7]), floatting=-4.5, npy_scalar=np.asarray(12), ) assert w1 == w2 assert not (w1 != w2) assert hash(w1) == hash(w2) # Changing attributes names only (a -> other_name). wp2_other = ParamsType( other_name=Generic(), array=TensorType("int64", (False, )), floatting=Scalar("float64"), npy_scalar=TensorType("float64", tuple()), ) w2 = Params( wp2_other, other_name=1, array=np.asarray([1, 2, 4, 5, 7]), floatting=-4.5, npy_scalar=np.asarray(12), ) assert w1 != w2 # Changing attributes values only (now a=2). w2 = Params( wp2, a=2, array=np.asarray([1, 2, 4, 5, 7]), floatting=-4.5, npy_scalar=np.asarray(12), ) assert w1 != w2 # Changing NumPy array values (5 -> -5). w2 = Params( wp2, a=1, array=np.asarray([1, 2, 4, -5, 7]), floatting=-4.5, npy_scalar=np.asarray(12), ) assert w1 != w2