Пример #1
0
    def make_node(self, a, val, offset):
        a = aet.as_tensor_variable(a)
        val = aet.as_tensor_variable(val)
        offset = aet.as_tensor_variable(offset)
        if a.ndim != 2:
            raise TypeError(
                "%s: first parameter must have exactly"
                " two dimensions" % self.__class__.__name__
            )
        elif val.ndim != 0:
            raise TypeError(
                f"{self.__class__.__name__}: second parameter must be a scalar"
            )
        elif offset.ndim != 0:
            raise TypeError(
                f"{self.__class__.__name__}: third parameter must be a scalar"
            )
        val = aet.cast(val, dtype=upcast(a.dtype, val.dtype))
        if val.dtype != a.dtype:
            raise TypeError(
                "%s: type of second parameter must be the same"
                " as the first's" % self.__class__.__name__
            )
        elif offset.dtype not in integer_dtypes:
            raise TypeError(
                f"{self.__class__.__name__}: type of third parameter must be as integer"
                " use aesara.tensor.cast( input, 'int32/int64')"
            )

        return Apply(self, [a, val, offset], [a.type()])
Пример #2
0
 def test_illegal(self):
     try:
         x = zmatrix()
         function([x], cast(x, "float64"))(np.ones((2, 3),
                                                   dtype="complex128"))
     except TypeError:
         return
     assert 0
Пример #3
0
    def grad(self, inp, cost_grad):
        """
        Notes
        -----
        The gradient is currently implemented for matrices only.
        """
        a, val, offset = inp
        grad = cost_grad[0]
        height, width = grad.shape

        if a.dtype.startswith("complex"):
            return [None, None]

        # only valid for matrices
        wr_a = fill_diagonal_offset(grad, 0, offset)

        offset_abs = abs_(offset)
        pos_offset_flag = ge(offset, 0)
        neg_offset_flag = lt(offset, 0)
        min_wh = minimum(width, height)

        start = offset * pos_offset_flag + offset_abs * width * neg_offset_flag
        num_of_step = minimum(
            min_wh,
            width * pos_offset_flag + height * neg_offset_flag - offset_abs)

        step = a.shape[1] + 1
        end = start + step * num_of_step

        # input of slice should be integer
        start = aet.cast(start, "int32")
        step = aet.cast(step, "int32")
        end = aet.cast(end, "int32")

        wr_val = grad.flatten()[start:end:step].sum()

        wr_offset = grad_undefined(
            self,
            2,
            offset,
            "offset is not defined for non-integer offset so"
            " fill_diagonal_offset(a,val,offset+eps) is undefined",
        )

        return [wr_a, wr_val, wr_offset]
Пример #4
0
def local_abstract_batch_norm_train(fgraph, node):
    if not isinstance(node.op, AbstractBatchNormTrain):
        return None

    x, scale, bias, epsilon, running_average_factor = node.inputs[:5]
    axes = node.op.axes
    if min(axes) < 0 or max(axes) > x.ndim:
        return None
    if (
        not isinstance(x.type, TensorType)
        or not isinstance(scale.type, TensorType)
        or not isinstance(bias.type, TensorType)
        or not isinstance(epsilon.type, TensorType)
        or not isinstance(running_average_factor.type, TensorType)
    ):
        return None
    # optional running_mean and running_var
    if len(node.inputs) > 5 and not isinstance(node.inputs[5].type, TensorType):
        return None
    if len(node.inputs) > 6 and not isinstance(node.inputs[6].type, TensorType):
        return None

    mean = x.mean(axes, keepdims=True)
    var = x.var(axes, keepdims=True)
    # The epsilon should not upcast the dtype.
    if var.dtype == "float32" and epsilon.dtype == "float64":
        epsilon = epsilon.astype("float32")
    invstd = inv(sqrt(var + epsilon))
    out = (x - mean) * (scale * invstd) + bias
    results = [out, mean, invstd]

    if len(node.inputs) > 5:
        running_mean = node.inputs[5]
        running_mean = (
            running_mean * (1.0 - running_average_factor)
            + mean * running_average_factor
        )
        results.append(running_mean)
    if len(node.inputs) > 6:
        m = aet.cast(prod(x.shape) / prod(scale.shape), config.floatX)
        running_var = node.inputs[6]
        running_var = (
            running_var * (1.0 - running_average_factor)
            + (m / (m - 1)) * var * running_average_factor
        )
        results.append(running_var)

    results = [
        aet.patternbroadcast(r, r_orig.broadcastable)
        for (r, r_orig) in zip(results, node.outputs)
    ]

    for var in aesara.graph.basic.vars_between(node.inputs, results):
        if var not in node.inputs:
            copy_stack_trace(node.outputs[0], var)
    return results
Пример #5
0
def pad_dims(input, leftdims, rightdims):
    """Reshapes the input to a (leftdims + rightdims) tensor

    This helper function is used to convert pooling inputs with arbitrary
    non-pooling dimensions to the correct number of dimensions for the
    GPU pooling ops.

    This reduces or expands the number of dimensions of the input to
    exactly `leftdims`, by adding extra dimensions on the left or by
    combining some existing dimensions on the left of the input.

    Use `unpad_dims` to reshape back to the original dimensions.

    Examples
    --------
    Given input of shape (3, 5, 7), ``pad_dims(input, 2, 2)``
    adds a singleton dimension and reshapes to (1, 3, 5, 7).
    Given that output from pad_dims, ``unpad_dims(output, input, 2, 2)``
    reshapes back to (3, 5, 7).

    Given input of shape (3, 5, 7, 9), ``pad_dims(input, 2, 2)``
    does not reshape and returns output with shape (3, 5, 7, 9).

    Given input of shape (3, 5, 7, 9, 11), ``pad_dims(input, 2, 2)``
    combines the first two dimensions and reshapes to (15, 7, 9, 11).

    Given input of shape (3, 5, 7, 9), ``pad_dims(input, 2, 3)``
    adds a singleton dimension and reshapes to (1, 3, 5, 7, 9).
    """
    assert input.ndim >= rightdims

    if input.ndim == (leftdims + rightdims):
        return input

    # extract image dimensions
    img_shape = input.shape[-rightdims:]

    non_pool_ndim = input.ndim - rightdims
    if non_pool_ndim < leftdims:
        # too few dimensions, pad on the left
        dummy_dims = as_tensor([1] * (leftdims - non_pool_ndim))
        new_shape = join(0, dummy_dims, input.shape[:non_pool_ndim], img_shape)
    else:
        # too many dimensions, combine the leading dimensions
        batched_ndim = non_pool_ndim - leftdims + 1
        batch_size = prod(input.shape[:batched_ndim])
        # convert to a vector for join
        batch_size = shape_padright(batch_size, 1)
        new_shape = join(
            0, batch_size, input.shape[batched_ndim:non_pool_ndim], img_shape
        )

    # store in the required shape
    new_shape = cast(new_shape, "int64")
    input_ND = GpuReshape(leftdims + rightdims)(input, new_shape)
    return input_ND
Пример #6
0
    def test_dtype_mismatch(self):
        rng = np.random.RandomState(utt.fetch_seed())
        data = rng.rand(5).astype(self.dtype)
        x = self.shared(data)
        y = aet.cast(x * 10, "int8")
        cond = iscalar("cond")

        with pytest.raises(TypeError):
            ifelse(cond, x, y)
        with pytest.raises(TypeError):
            ifelse(cond, y, x)
Пример #7
0
 def make_node(self, a, val):
     a = basic.as_tensor_variable(a)
     val = basic.as_tensor_variable(val)
     if a.ndim < 2:
         raise TypeError("%s: first parameter must have at least"
                         " two dimensions" % self.__class__.__name__)
     elif val.ndim != 0:
         raise TypeError("%s: second parameter must be a scalar" %
                         self.__class__.__name__)
     val = basic.cast(val, dtype=upcast(a.dtype, val.dtype))
     if val.dtype != a.dtype:
         raise TypeError("%s: type of second parameter must be the same as"
                         " the first's" % self.__class__.__name__)
     return Apply(self, [a, val], [a.type()])
Пример #8
0
def test_undefined_grad_opt():
    # Make sure that undefined grad get removed in optimized graph.
    random = MRG_RandomStream(np.random.randint(1, 2147462579))
    pvals = shared(np.random.rand(10, 20).astype(config.floatX))
    pvals = pvals / pvals.sum(axis=1)
    pvals = zero_grad(pvals)
    samples = random.multinomial(pvals=pvals, n=1)
    samples = cast(samples, pvals.dtype)
    samples = zero_grad(samples)
    cost = tt_sum(samples + pvals)
    grad_out = grad(cost, samples)
    f = function([], grad_out)
    assert not any(
        [isinstance(node.op, UndefinedGrad) for node in f.maker.fgraph.apply_nodes]
    )
Пример #9
0
def normalize_size_param(size):
    """Create an Aesara value for a ``RandomVariable`` ``size`` parameter."""
    if size is None:
        size = constant([], dtype="int64")
    elif isinstance(size, int):
        size = as_tensor_variable([size], ndim=1)
    elif not isinstance(size, (np.ndarray, Variable, Sequence)):
        raise TypeError(
            "Parameter size must be None, an integer, or a sequence with integers."
        )
    else:
        size = cast(as_tensor_variable(size, ndim=1), "int64")

    assert size.dtype in int_dtypes

    return size
Пример #10
0
def bincount(x, weights=None, minlength=None, assert_nonneg=False):
    """Count number of occurrences of each value in array of ints.

    The number of bins (of size 1) is one larger than the largest
    value in x. If minlength is specified, there will be at least
    this number of bins in the output array (though it will be longer
    if necessary, depending on the contents of x). Each bin gives the
    number of occurrences of its index value in x. If weights is
    specified the input array is weighted by it, i.e. if a value n
    is found at position i, out[n] += weight[i] instead of out[n] += 1.

    Parameters
    ----------
    x : 1 dimension, nonnegative ints
    weights : array of the same shape as x with corresponding weights.
        Optional.
    minlength : A minimum number of bins for the output array.
        Optional.
    assert_nonneg : A flag that inserts an assert_op to check if
        every input x is nonnegative.
        Optional.


    .. versionadded:: 0.6

    """
    if x.ndim != 1:
        raise TypeError("Inputs must be of dimension 1.")

    if assert_nonneg:
        assert_op = Assert("Input to bincount has negative values!")
        x = assert_op(x, aet_all(x >= 0))

    max_value = aet.cast(x.max() + 1, "int64")

    if minlength is not None:
        max_value = maximum(max_value, minlength)

    # Note: we do not use inc_subtensor(out[x], ...) in the following lines,
    # since out[x] raises an exception if the indices (x) are int8.
    if weights is None:
        out = aet.zeros([max_value], dtype=x.dtype)
        out = advanced_inc_subtensor1(out, 1, x)
    else:
        out = aet.zeros([max_value], dtype=weights.dtype)
        out = advanced_inc_subtensor1(out, weights, x)
    return out
Пример #11
0
def test_undefined_grad_opt():
    # Make sure that undefined grad get removed in optimized graph.
    random = MRG_RandomStream(np.random.default_rng().integers(1, 2147462579))

    pvals = aesara.shared(np.random.random((10, 20)).astype(config.floatX))
    pvals = pvals / pvals.sum(axis=1)
    pvals = zero_grad(pvals)

    samples = random.multinomial(pvals=pvals, n=1)
    samples = at.cast(samples, pvals.dtype)
    samples = zero_grad(samples)

    cost = at_sum(samples + pvals)
    grad_res = grad(cost, samples)

    f = aesara.function([], grad_res)
    assert not any(
        isinstance(node.op, UndefinedGrad) for node in f.maker.fgraph.apply_nodes
    )
Пример #12
0
def normalize_size_param(size):
    """Create an Aesara value for a ``RandomVariable`` ``size`` parameter."""
    if size is None:
        size = constant([], dtype="int64")
    elif isinstance(size, int):
        size = as_tensor_variable([size], ndim=1)
    elif not isinstance(size, (np.ndarray, Variable, Sequence)):
        raise TypeError(
            "Parameter size must be None, an integer, or a sequence with integers."
        )
    else:
        size = cast(as_tensor_variable(size, ndim=1), "int64")

        if not isinstance(size, Constant):
            # This should help ensure that the length of non-constant `size`s
            # will be available after certain types of cloning (e.g. the kind
            # `Scan` performs)
            size = specify_shape(size, (get_vector_length(size), ))

    assert size.dtype in int_dtypes

    return size
Пример #13
0
 def infer_shape(self, node, in_shapes):
     temp = node.inputs[0]
     M = basic.switch(basic.lt(temp, 0), basic.cast(0, temp.dtype), temp)
     return [[M]]
Пример #14
0
def safe_new(x: Variable,
             tag: str = "",
             dtype: Optional[Union[str, np.dtype]] = None) -> Variable:
    """Clone variables.

    Internal function that constructs a new variable from `x` with the same
    type, but with a different name (old name + tag). This function is used
    by `gradient`, or the R-op to construct new variables for the inputs of
    the inner graph such that there is no interference between the original
    graph and the newly constructed graph.

    """
    if hasattr(x, "name") and x.name is not None:
        nw_name = x.name + tag
    else:
        nw_name = None

    if isinstance(x, Constant):
        # TODO: Do something better about this
        assert isinstance(x.type, HasDataType)

        if dtype and x.type.dtype != dtype:
            casted_x = cast(x, dtype)
            nwx = type(x)(casted_x.type, x.data, x.name)
            nwx.tag = copy.copy(x.tag)
            return nwx
        else:
            return x
    # Note, `as_tensor_variable` will convert the `ScalarType` into a
    # `TensorScalar` that will require a `ScalarFromTensor` `Op`, making the
    # push-out optimization fail
    elif isinstance(x, aes.ScalarVariable):
        if dtype:
            nw_x = aes.get_scalar_type(dtype=dtype)()
        else:
            nw_x = x.type()
        nw_x.name = nw_name
        if config.compute_test_value != "off":
            # Copy test value, cast it if necessary
            try:
                x_test_value = get_test_value(x)
            except TestValueError:
                pass
            else:
                # This clause is executed if no exception was raised
                nw_x.tag.test_value = nw_x.type.filter(x_test_value)
        return nw_x
    else:
        try:
            x = at.as_tensor_variable(x)
        except TypeError:
            # This could happen for example for random states
            pass

    # Cast `x` if needed. If `x` has a test value, this will also cast it.
    if dtype:
        # TODO: Do something better about this
        assert isinstance(x.type, HasDataType)

        if x.type.dtype != dtype:
            x = cast(x, dtype)

    nw_x = x.type()
    nw_x.name = nw_name
    # Preserve test values so that the `compute_test_value` option can be used.
    # The test value is deep-copied to ensure there can be no interactions
    # between test values, due to inplace operations for instance. This may
    # not be the most efficient memory-wise, though.
    if config.compute_test_value != "off":
        try:
            nw_x.tag.test_value = copy.deepcopy(get_test_value(x))
        except TestValueError:
            pass

    return nw_x
Пример #15
0
    def make_node(self, rng, size, dtype, *dist_params):
        """Create a random variable node.

        XXX: Unnamed/non-keyword arguments are considered distribution
        parameters!  If you want to set `size`, `rng`, and/or `name`, use their
        keywords.

        Parameters
        ----------
        rng: RandomStateType
            Existing Aesara `RandomState` object to be used.  Creates a
            new one, if `None`.
        size: int or Sequence
            Numpy-like size of the output (i.e. replications).
        dtype: str
            The dtype of the sampled output.  If the value ``"floatX"`` is
            given, then ``dtype`` is set to ``aesara.config.floatX``.  This
            value is only used when `self.dtype` isn't set.
        dist_params: list
            Distribution parameters.

        Results
        -------
        out: `Apply`
            A node with inputs `(rng, size, dtype) + dist_args` and outputs
            `(rng_var, out_var)`.

        """
        if size is None:
            size = constant([], dtype="int64")
        elif isinstance(size, int):
            size = as_tensor_variable([size], ndim=1)
        elif not isinstance(size, (np.ndarray, Variable, Sequence)):
            raise TypeError(
                "Parameter size must be None, an integer, or a sequence with integers."
            )
        else:
            size = cast(as_tensor_variable(size, ndim=1), "int64")

        assert size.dtype in int_dtypes

        dist_params = tuple(
            as_tensor_variable(p) if not isinstance(p, Variable) else p
            for p in dist_params)

        if rng is None:
            rng = aesara.shared(np.random.RandomState())
        elif not isinstance(rng.type, RandomStateType):
            raise TypeError(
                "The type of rng should be an instance of RandomStateType")

        bcast = self.compute_bcast(dist_params, size)
        dtype = self.dtype or dtype

        if dtype == "floatX":
            dtype = config.floatX
        elif dtype is None or (isinstance(dtype, str)
                               and dtype not in all_dtypes):
            raise TypeError("dtype is unspecified")

        if isinstance(dtype, str):
            dtype_idx = constant(all_dtypes.index(dtype), dtype="int64")
        else:
            dtype_idx = constant(dtype, dtype="int64")
            dtype = all_dtypes[dtype_idx.data]

        outtype = TensorType(dtype=dtype, broadcastable=bcast)
        out_var = outtype()
        inputs = (rng, size, dtype_idx) + dist_params
        outputs = (rng.type(), out_var)

        return Apply(self, inputs, outputs)
Пример #16
0
 def test_cast(self):
     x = zvector()
     with pytest.raises(TypeError):
         cast(x, "int32")
Пример #17
0
def scan_checkpoints(
    fn,
    sequences=None,
    outputs_info=None,
    non_sequences=None,
    name="checkpointscan_fn",
    n_steps=None,
    save_every_N=10,
    padding=True,
):
    """Scan function that uses less memory, but is more restrictive.

    In :func:`~aesara.scan`, if you compute the gradient of the output
    with respect to the input, you will have to store the intermediate
    results at each time step, which can be prohibitively huge. This
    function allows to do ``save_every_N`` steps of forward computations
    without storing the intermediate results, and to recompute them during
    the gradient computation.

    Notes
    -----
    Current assumptions:

    * Every sequence has the same length.
    * If ``n_steps`` is specified, it has the same value as the length of
      any sequence.
    * The value of ``save_every_N`` divides the number of steps the scan
      will run without remainder.
    * Only singly-recurrent and non-recurrent outputs are used.
      No multiple recurrences.
    * Only the last timestep of any output will ever be used.

    Parameters
    ----------
    fn
        ``fn`` is a function that describes the operations involved in one
        step of ``scan``. See the documentation of :func:`~aesara.scan`
        for more information.

    sequences
        ``sequences`` is the list of Aesara variables or dictionaries
        describing the sequences ``scan`` has to iterate over. All
        sequences must be the same length in this version of ``scan``.

    outputs_info
        ``outputs_info`` is the list of Aesara variables or dictionaries
        describing the initial state of the outputs computed
        recurrently.

    non_sequences
        ``non_sequences`` is the list of arguments that are passed to
        ``fn`` at each steps. One can opt to exclude variable
        used in ``fn`` from this list as long as they are part of the
        computational graph, though for clarity we encourage not to do so.

    n_steps
        ``n_steps`` is the number of steps to iterate given as an int
        or Aesara scalar (> 0). If any of the input sequences do not have
        enough elements, scan will raise an error. If n_steps is not provided,
        ``scan`` will figure out the amount of steps it should run given its
        input sequences.

    save_every_N
        ``save_every_N`` is the number of steps to go without storing
        the computations of ``scan`` (ie they will have to be recomputed
        during the gradient computation).

    padding
        If the length of the sequences is not a multiple of ``save_every_N``,
        the sequences will be zero padded to make this version of ``scan``
        work properly, but will also result in a memory copy. It can be
        avoided by setting ``padding`` to False, but you need to make
        sure the length of the sequences is a multiple of ``save_every_N``.

    Returns
    -------
    tuple
        Tuple of the form ``(outputs, updates)`` as in :func:`~aesara.scan`, but
        with a small change: It only contain the output at each
        ``save_every_N`` step. The time steps that are not returned by
        this function will be recomputed during the gradient computation
        (if any).

    See Also
    --------
    :func:`~aesara.scan`: Looping in Aesara.

    """
    # Standardize the format of input arguments
    if sequences is None:
        sequences = []
    elif not isinstance(sequences, list):
        sequences = [sequences]

    if not isinstance(outputs_info, list):
        outputs_info = [outputs_info]

    if non_sequences is None:
        non_sequences = []
    elif not isinstance(non_sequences, list):
        non_sequences = [non_sequences]

    # Check that outputs_info has no taps:
    for element in outputs_info:
        if isinstance(element, dict) and "taps" in element:
            raise RuntimeError("scan_checkpoints doesn't work with taps.")

    # Determine how many steps the original scan would run
    if n_steps is None:
        n_steps = sequences[0].shape[0]

    # Compute the number of steps of the outer scan
    o_n_steps = at.cast(ceil(n_steps / save_every_N), "int64")

    # Compute the number of steps of the inner scan
    i_n_steps = save_every_N * at.ones((o_n_steps, ), "int64")
    mod = n_steps % save_every_N
    last_n_steps = at.switch(eq(mod, 0), save_every_N, mod)
    i_n_steps = set_subtensor(i_n_steps[-1], last_n_steps)

    # Pad the sequences if needed
    if padding:
        # Since padding could be an empty tensor, Join returns a view of s.
        join = Join(view=0)
        for i, s in enumerate(sequences):
            n = s.shape[0] % save_every_N
            z = at.zeros((n, s.shape[1:]), dtype=s.dtype)
            sequences[i] = join(0, [s, z])

    # Establish the input variables of the outer scan
    o_sequences = [
        s.reshape(
            [s.shape[0] / save_every_N, save_every_N] +
            [s.shape[i] for i in range(1, s.ndim)],
            s.ndim + 1,
        ) for s in sequences
    ]
    o_sequences.append(i_n_steps)
    new_nitsots = [i for i in outputs_info if i is None]
    o_nonsequences = non_sequences

    def outer_step(*args):
        # Separate the received arguments into their respective (seq, outputs
        # from previous iterations, nonseqs) categories
        i_sequences = list(args[:len(o_sequences)])
        i_prev_outputs = list(args[len(o_sequences):-len(o_nonsequences)])
        i_non_sequences = list(args[-len(o_nonsequences):])
        i_outputs_infos = i_prev_outputs + [
            None,
        ] * len(new_nitsots)

        # Call the user-provided function with the proper arguments
        results, updates = scan(
            fn=fn,
            sequences=i_sequences[:-1],
            outputs_info=i_outputs_infos,
            non_sequences=i_non_sequences,
            name=name + "_inner",
            n_steps=i_sequences[-1],
        )
        if not isinstance(results, list):
            results = [results]

        # Keep only the last timestep of every output but keep all the updates
        if not isinstance(results, list):
            return results[-1], updates
        else:
            return [r[-1] for r in results], updates

    results, updates = scan(
        fn=outer_step,
        sequences=o_sequences,
        outputs_info=outputs_info,
        non_sequences=o_nonsequences,
        name=name + "_outer",
        n_steps=o_n_steps,
        allow_gc=True,
    )

    return results, updates
Пример #18
0
 def test_illegal(self):
     x = zmatrix()
     with pytest.raises(TypeError):
         function([x], cast(x, "float64"))(np.ones((2, 3),
                                                   dtype="complex128"))
Пример #19
0
 def infer_shape(self, fgraph, node, in_shapes):
     temp = node.inputs[0]
     M = aet.switch(lt(temp, 0), aet.cast(0, temp.dtype), temp)
     return [[M]]