Пример #1
0
def _max(
        self,
        where=(-inf, inf),
        closed=None,
):
    where = _replace_none_with_infs(where)
    if closed is None:
        closed = self._closed
    return max(self.values_in_range(where, closed))
Пример #2
0
    def rolling_mean(self, window=(0, 0), where=(-inf, inf)):
        """
        Returns coordinates defining rolling mean

        The rolling mean of a step function is a continous piece-wise linear function, hence it can
        be described by a sequence of x,y coordinates which mark where function changes gradient.  These
        x,y coordinates are returned as a :class:`pandas.Series` which could then be used with
        :meth:`matplotlib.axes.Axes.plot`, or equivalent, to visualise.

        A rolling mean requires a window around a point x (referred to as the focal point) to be defined.
        In this implementation the window is defined by two values paired into an array-like parameter called *window*.
        These two numbers are the distance from the focal point to the left boundary of the window, and the right boundary
        of the window respectively.  This allows for trailing windows, leading windows and everything between
        (including a centred window).

        Parameters
        ----------
        window : array-like of int, float or pandas.Timedelta
            should be length of 2. Defines distances from focal point to window boundaries.
        where : tuple or list of length two, optional
            Indicates the domain interval over which to evaluate the step function.
            Default is (-sc.inf, sc.inf) or equivalently (None, None).

        Returns
        -------
        :class:`pandas.Series`

        See Also
        --------
        Stairs.mean
        """
        where = _replace_none_with_infs(where)
        assert len(
            window) == 2, "Window should be a listlike object of length 2."
        left_delta, right_delta = window
        lower, upper = where
        clipped = self.clip(lower, upper)
        if clipped._data is None:
            return pd.Series([clipped.initial_value] * 2, index=where)
        step_points = clipped._data.index
        sample_points = pd.Index.union(
            step_points - left_delta,
            step_points - right_delta,
        )
        ii = pd.IntervalIndex.from_arrays(sample_points + left_delta,
                                          sample_points + right_delta)
        s = pd.Series(
            clipped.slice(ii).mean().values,
            index=sample_points,
        )
        if lower != -inf:
            s = s.loc[s.index >= lower - left_delta]
        if upper != inf:
            s = s.loc[s.index <= upper - right_delta]
        return s
Пример #3
0
def corr(self, other, where=(-inf, inf), lag=0, clip="pre"):
    """
    Calculates either correlation, autocorrelation or cross-correlation.

    All calculations are based off the `Pearson correlation coefficient <https://en.wikipedia.org/wiki/Pearson_correlation_coefficient>`_.

    The calculation is between two step functions described by *self* and *other*.
    If lag is None or 0 then correlation is calculated, otherwise cross-correlation is calculated.
    Autocorrelation is a special case of cross-correlation when *other* is equal to *self*.

    Parameters
    ----------
    other: :class:`Stairs`
        the stairs instance with which to compute the correlation
    where : tuple or list of length two, optional
        Indicates the domain interval over which to perform the calculation.
        Default is (-sc.inf, sc.inf) or equivalently (None, None).
    lag : int, float, pandas.Timedelta
        A pandas.Timedelta is only valid when domain is date-like.
    clip : {'pre', 'post'}, default 'pre'
        Only relevant when lag is non-zero.  Determines if the domain is applied before or
        after *other* is translated.  If 'pre' then the domain over which the calculation
        is performed is the overlap of the original domain and the translated domain.

    Returns
    -------
    float
        The correlation (or cross-correlation) between *self* and *other*

    See Also
    --------
    Stairs.cov, staircase.corr, staircase.cov
    """
    where = list(_replace_none_with_infs(where))
    if lag != 0:
        assert clip in ["pre", "post"]
        if clip == "pre" and where[1] != inf:
            where[1] = where[1] - lag
        other = other.shift(-lag)
    mask = self.isna() | other.isna()
    self = self.mask(mask)
    other = other.mask(mask)
    denominator = self.clip(*where).std() * other.clip(*where).std()
    if denominator == 0:
        return np.nan
    return self.cov(other, where) / denominator
Пример #4
0
def values_in_range(self, where=(-inf, inf), closed=None):
    where = _replace_none_with_infs(where)
    assert len(
        where) == 2, "Parameter 'where' should be list or tuple of length 2."
    if closed is None:
        closed = self._closed
    lower, upper = where
    lower_how, upper_how = _get_lims(self, closed)
    left_index, right_index = _get_slice_index(self, lower, upper, lower_how,
                                               upper_how)
    if right_index == -1:
        return np.array([self.initial_value])
    values = self._get_values().iloc[max(0, left_index):right_index]
    if left_index < 0 and not np.isnan(self.initial_value):
        values = np.append([self.initial_value], values)
    unique = np.unique(values)
    return unique[~np.isnan(unique)]
Пример #5
0
def agg(self, name, where=(-inf, inf), closed=None):

    where = _replace_none_with_infs(where)
    stairs = self if where == (-inf, inf) else self.clip(*where)

    def apply(func):
        if isinstance(func, str):
            name = func
            func = _get_stairs_method(name)
        else:
            name = func.__name__
        if name in ("min", "max"):
            return name, func(self, where=where, closed=closed)
        else:
            return name, func(stairs)

    if is_list_like(name):
        return pd.Series({func: calc for func, calc in map(apply, name)})
    return apply(name)[1]
Пример #6
0
def clip(self, lower=-inf, upper=inf):
    lower, upper = _replace_none_with_infs((lower, upper))
    if not lower < upper:
        raise ValueError("'lower' must be strictly less than 'upper'.")
    if lower == -inf and upper == inf:
        return self
    left_index, right_index = _get_slice_index(self,
                                               lower,
                                               upper,
                                               lower_how="right",
                                               upper_how="left")

    if right_index == -1:
        sliced_values = pd.Series(dtype="float64")
    else:
        sliced_values = self._get_values().iloc[max(0, left_index):right_index]
    if upper != inf:
        sliced_values.loc[upper] = np.nan
    if lower != -inf and left_index < 0:
        sliced_values = pd.concat(
            [pd.Series([self.initial_value], index=[lower]), sliced_values])
    elif sliced_values.index[0] < lower:
        index = pd.Series(sliced_values.index)
        index.iloc[0] = lower
        sliced_values.index = index

    data = pd.DataFrame({"value": sliced_values})

    initial_value = self.initial_value if lower == -inf else np.nan

    result = sc.Stairs._new(
        initial_value=initial_value,
        data=data,
        closed=self.closed,
    )
    result._remove_redundant_step_points()
    return result
Пример #7
0
    def describe(self, where=(-inf, inf), percentiles=(25, 50, 75)):
        """
        Generate descriptive statistics for the step function values over a specified domain.

        Parameters
        ----------
        where : tuple or list of length two, optional
            Indicates the domain interval over which to evaluate the step function.
            Default is (-sc.inf, sc.inf) or equivalently (None, None).
        percentiles: array-like of float, default [25, 50, 70]
            The percentiles to include in output.  Numbers should be in the range 0 to 100.

        Returns
        -------
        :class:`pandas.Series`

        See Also
        --------
        Stairs.mean, Stairs.std, Stairs.min, Stairs.percentile, Stairs.max
        """
        where = _replace_none_with_infs(where)
        stairs = self if where == (-inf, inf) else self.clip(*where)

        return pd.Series({
            **{
                "unique": stairs.percentile.clip(0, 100).number_of_steps - 1,
                "mean": stairs.mean,
                "std": stairs.std,
                "min": stairs.min,
            },
            **{f"{perc}%": stairs.percentile(perc)
               for perc in percentiles},
            **{
                "max": stairs.max
            },
        })