Пример #1
0
def get_steps(
    steps: Optional[Union[int, np.ndarray, TensorVariable]],
    *,
    shape: Optional[Shape] = None,
    dims: Optional[Dims] = None,
    observed: Optional[Any] = None,
    step_shape_offset: int = 0,
):
    """Extract number of steps from shape / dims / observed information

    Parameters
    ----------
    steps:
        User specified steps for timeseries distribution
    shape:
        User specified shape for timeseries distribution
    dims:
        User specified dims for timeseries distribution
    observed:
        User specified observed data from timeseries distribution
    step_shape_offset:
        Difference between last shape dimension and number of steps in timeseries
        distribution, defaults to 0

    Returns
    -------
    steps
        Steps, if specified directly by user, or inferred from the last dimension of
        shape / dims / observed. When two sources of step information are provided,
        a symbolic Assert is added to ensure they are consistent.
    """
    inferred_steps = None
    if shape is not None:
        shape = to_tuple(shape)
        if shape[-1] is not ...:
            inferred_steps = shape[-1] - step_shape_offset

    if inferred_steps is None and dims is not None:
        dims = convert_dims(dims)
        if dims[-1] is not ...:
            model = modelcontext(None)
            inferred_steps = model.dim_lengths[dims[-1]] - step_shape_offset

    if inferred_steps is None and observed is not None:
        observed = convert_observed_data(observed)
        inferred_steps = observed.shape[-1] - step_shape_offset

    if inferred_steps is None:
        inferred_steps = steps
    # If there are two sources of information for the steps, assert they are consistent
    elif steps is not None:
        inferred_steps = Assert(msg="Steps do not match last shape dimension")(
            inferred_steps, at.eq(inferred_steps, steps)
        )
    return inferred_steps
Пример #2
0
def resize_from_observed(
        observed,
        ndim_implied: int) -> Tuple[StrongSize, Union[np.ndarray, Variable]]:
    """Determines a potential resize shape from observations.

    Parameters
    ----------
    observed : scalar, array-like
        The value of the `observed` kwarg to the RV creation.
    ndim_implied : int
        Number of RV dimensions that were implied from its inputs alone.

    Returns
    -------
    resize_shape : array-like
        Shape of new dimensions that should be prepended.
    observed : scalar, array-like
        Observations as numpy array or `Variable`.
    """
    if not hasattr(observed, "shape"):
        observed = convert_observed_data(observed)
    ndim_resize = observed.ndim - ndim_implied
    resize_shape = tuple(observed.shape[d] for d in range(ndim_resize))
    return resize_shape, observed
Пример #3
0
def test_pandas_to_array_pandas_index():
    pd = pytest.importorskip("pandas")
    data = pd.Index([1, 2, 3])
    result = convert_observed_data(data)
    expected = np.array([1, 2, 3])
    np.testing.assert_array_equal(result, expected)
Пример #4
0
def Data(
    name: str,
    value,
    *,
    dims: Optional[Sequence[str]] = None,
    coords: Optional[Dict[str, Sequence]] = None,
    export_index_as_coords=False,
    mutable: Optional[bool] = None,
    **kwargs,
) -> Union[SharedVariable, TensorConstant]:
    """Data container that registers a data variable with the model.

    Depending on the ``mutable`` setting (default: True), the variable
    is registered as a :class:`~aesara.compile.sharedvalue.SharedVariable`,
    enabling it to be altered in value and shape, but NOT in dimensionality using
    :func:`pymc.set_data`.

    To set the value of the data container variable, check out
    :func:`pymc.Model.set_data`.

    For more information, read the notebook :ref:`nb:data_container`.

    Parameters
    ----------
    name : str
        The name for this variable.
    value : array_like or pandas.Series, pandas.Dataframe
        A value to associate with this variable.
    dims : str or tuple of str, optional
        Dimension names of the random variables (as opposed to the shapes of these
        random variables). Use this when ``value`` is a pandas Series or DataFrame. The
        ``dims`` will then be the name of the Series / DataFrame's columns. See ArviZ
        documentation for more information about dimensions and coordinates:
        :ref:`arviz:quickstart`.
        If this parameter is not specified, the random variables will not have dimension
        names.
    coords : dict, optional
        Coordinate values to set for new dimensions introduced by this ``Data`` variable.
    export_index_as_coords : bool, default=False
        If True, the ``Data`` container will try to infer what the coordinates
        and dimension names should be if there is an index in ``value``.
    mutable : bool, optional
        Switches between creating a :class:`~aesara.compile.sharedvalue.SharedVariable`
        (``mutable=True``) vs. creating a :class:`~aesara.tensor.TensorConstant`
        (``mutable=False``).
        Consider using :class:`pymc.ConstantData` or :class:`pymc.MutableData` as less
        verbose alternatives to ``pm.Data(..., mutable=...)``.
        If this parameter is not specified, the value it takes will depend on the
        version of the package. Since ``v4.1.0`` the default value is
        ``mutable=False``, with previous versions having ``mutable=True``.
    **kwargs : dict, optional
        Extra arguments passed to :func:`aesara.shared`.

    Examples
    --------
    >>> import pymc as pm
    >>> import numpy as np
    >>> # We generate 10 datasets
    >>> true_mu = [np.random.randn() for _ in range(10)]
    >>> observed_data = [mu + np.random.randn(20) for mu in true_mu]

    >>> with pm.Model() as model:
    ...     data = pm.MutableData('data', observed_data[0])
    ...     mu = pm.Normal('mu', 0, 10)
    ...     pm.Normal('y', mu=mu, sigma=1, observed=data)

    >>> # Generate one trace for each dataset
    >>> idatas = []
    >>> for data_vals in observed_data:
    ...     with model:
    ...         # Switch out the observed dataset
    ...         model.set_data('data', data_vals)
    ...         idatas.append(pm.sample())
    """
    if coords is None:
        coords = {}

    if isinstance(value, list):
        value = np.array(value)

    # Add data container to the named variables of the model.
    model = pm.Model.get_context(error_if_none=False)
    if model is None:
        raise TypeError(
            "No model on context stack, which is needed to instantiate a data container. "
            "Add variable inside a 'with model:' block."
        )
    name = model.name_for(name)

    # `convert_observed_data` takes care of parameter `value` and
    # transforms it to something digestible for Aesara.
    arr = convert_observed_data(value)

    if mutable is None:
        major, minor = (int(v) for v in pm.__version__.split(".")[:2])
        mutable = major == 4 and minor < 1
        if mutable:
            warnings.warn(
                "The `mutable` kwarg was not specified. Currently it defaults to `pm.Data(mutable=True)`,"
                " which is equivalent to using `pm.MutableData()`."
                " In v4.1.0 the default will change to `pm.Data(mutable=False)`, equivalent to `pm.ConstantData`."
                " Set `pm.Data(..., mutable=False/True)`, or use `pm.ConstantData`/`pm.MutableData`.",
                FutureWarning,
            )
    if mutable:
        x = aesara.shared(arr, name, **kwargs)
    else:
        x = at.as_tensor_variable(arr, name, **kwargs)

    if isinstance(dims, str):
        dims = (dims,)
    if not (dims is None or len(dims) == x.ndim):
        raise pm.exceptions.ShapeError(
            "Length of `dims` must match the dimensions of the dataset.",
            actual=len(dims),
            expected=x.ndim,
        )

    # Optionally infer coords and dims from the input value.
    if export_index_as_coords:
        coords, dims = determine_coords(model, value, dims)

    if dims:
        if not mutable:
            # Use the dimension lengths from the before it was tensorified.
            # These can still be tensors, but in many cases they are numeric.
            xshape = np.shape(arr)
        else:
            xshape = x.shape
        # Register new dimension lengths
        for d, dname in enumerate(dims):
            if not dname in model.dim_lengths:
                model.add_coord(
                    name=dname,
                    # Note: Coordinate values can't be taken from
                    # the value, because it could be N-dimensional.
                    values=coords.get(dname, None),
                    mutable=mutable,
                    length=xshape[d],
                )

    model.add_random_variable(x, dims=dims)

    return x