Пример #1
0
    def split_into_ranges(self, n=None, range_len=None):
        """Split into `n` ranges each `range_len` long.

        At least one of `range_len` and `n` must be set.
        If `range_len` is `None`, will split evenly into `n` ranges.
        If `n` is `None`, will return the maximum number of ranges of length `range_len`.

        !!! note
            The datetime-like format of the index will be lost as result of this operation.
            Make sure to store the index metadata such as frequency information beforehand.

        Example:
            ```python-repl
            >>> print(df.vbt.split_into_ranges(n=2))
                                            a                     b                     c
            range_start 2020-01-01 2020-01-04 2020-01-01 2020-01-04 2020-01-01 2020-01-04
            range_end   2020-01-02 2020-01-05 2020-01-02 2020-01-05 2020-01-02 2020-01-05
            0                  1.0        4.0        5.0        2.0        1.0        2.0
            1                  2.0        5.0        4.0        1.0        2.0        1.0
            >>> print(df.vbt.split_into_ranges(range_len=4))
                                            a                     b                     c
            range_start 2020-01-01 2020-01-02 2020-01-01 2020-01-02 2020-01-01 2020-01-02
            range_end   2020-01-04 2020-01-05 2020-01-04 2020-01-05 2020-01-04 2020-01-05
            0                  1.0        2.0        5.0        4.0        1.0        2.0
            1                  2.0        3.0        4.0        3.0        2.0        3.0
            2                  3.0        4.0        3.0        2.0        3.0        2.0
            3                  4.0        5.0        2.0        1.0        2.0        1.0
            ```"""
        if range_len is None and n is None:
            raise ValueError("At least range_len or n must be set")

        if range_len is None:
            range_len = len(self.index) // n
        cube = nb.rolling_window_nb(self.to_2d_array(), range_len)
        if n is not None:
            if n > cube.shape[2]:
                raise ValueError(f"n cannot be bigger than the maximum number of ranges {cube.shape[2]}")
            idxs = np.round(np.linspace(0, cube.shape[2] - 1, n)).astype(int)
            cube = cube[:, :, idxs]
        else:
            idxs = np.arange(cube.shape[2])
        matrix = np.hstack(cube)
        range_starts = pd.Index(self.index[idxs], name='range_start')
        range_ends = pd.Index(self.index[idxs + range_len - 1], name='range_end')
        range_columns = index_fns.stack_indexes(range_starts, range_ends)
        new_columns = index_fns.combine_indexes(self.columns, range_columns)
        return pd.DataFrame(matrix, columns=new_columns)
Пример #2
0
def build_column_hierarchy(param_list, level_names, ts_columns):
    """For each parameter in `param_list`, create a new column level with parameter values. 
    Combine this level with columns `ts_columns` using Cartesian product.
    
    Excludes level names that are `None`."""
    checks.assert_same_shape(param_list, level_names, axis=0)

    param_indexes = []
    for i in range(len(param_list)):
        if level_names[i] is not None:
            param_index = index_fns.index_from_values(param_list[i],
                                                      name=level_names[i])
            param_indexes.append(param_index)
    if len(param_indexes) > 1:
        param_columns = index_fns.stack_indexes(*param_indexes)
    elif len(param_indexes) == 1:
        param_columns = param_indexes[0]
    else:
        param_columns = None
    if param_columns is not None:
        return index_fns.combine_indexes(param_columns, ts_columns)
    return ts_columns
Пример #3
0
def broadcast_index(args,
                    to_shape,
                    index_from=None,
                    axis=0,
                    ignore_sr_names=None,
                    **kwargs):
    """Produce a broadcast index/columns.

    Args:
        *args (array_like): Array-like objects.
        to_shape (tuple): Target shape.
        index_from (None, int, str or array_like): Broadcasting rule for this index/these columns.

            Accepts the following values:

            * 'default' - take the value from `vectorbt.settings.broadcasting`
            * 'strict' - ensure that all pandas objects have the same index/columns
            * 'stack' - stack different indexes/columns using `vectorbt.base.index_fns.stack_indexes`
            * 'ignore' - ignore any index/columns
            * integer - use the index/columns of the i-nth object in `args`
            * None - use the original index/columns of the objects in `args`
            * everything else will be converted to `pd.Index`

        axis (int): Set to 0 for index and 1 for columns.
        ignore_sr_names (bool): Whether to ignore Series names if they are in conflict.

            Conflicting Series names are those that are different but not None.
        **kwargs: Keyword arguments passed to `vectorbt.base.index_fns.stack_indexes`.

    For defaults, see `vectorbt.settings.broadcasting`.

    !!! note
        Series names are treated as columns with a single element but without a name.
        If a column level without a name loses its meaning, better to convert Series to DataFrames
        with one column prior to broadcasting. If the name of a Series is not that important,
        better to drop it altogether by setting it to None.
    """
    from vectorbt import settings

    if ignore_sr_names is None:
        ignore_sr_names = settings.broadcasting['ignore_sr_names']
    index_str = 'columns' if axis == 1 else 'index'
    to_shape_2d = (to_shape[0], 1) if len(to_shape) == 1 else to_shape
    # maxlen stores the length of the longest index
    maxlen = to_shape_2d[1] if axis == 1 else to_shape_2d[0]
    new_index = None

    if index_from is not None:
        if isinstance(index_from, int):
            # Take index/columns of the object indexed by index_from
            if not checks.is_pandas(args[index_from]):
                raise TypeError(
                    f"Argument under index {index_from} must be a pandas object"
                )
            new_index = index_fns.get_index(args[index_from], axis)
        elif isinstance(index_from, str):
            if index_from == 'ignore':
                # Ignore index/columns
                new_index = pd.RangeIndex(start=0, stop=maxlen, step=1)
            elif index_from in ('stack', 'strict'):
                # Check whether all indexes/columns are equal
                last_index = None  # of type pd.Index
                index_conflict = False
                for arg in args:
                    if checks.is_pandas(arg):
                        index = index_fns.get_index(arg, axis)
                        if last_index is not None:
                            if not pd.Index.equals(index, last_index):
                                index_conflict = True
                        last_index = index
                        continue
                if not index_conflict:
                    new_index = last_index
                else:
                    # If pandas objects have different index/columns, stack them together
                    for arg in args:
                        if checks.is_pandas(arg):
                            index = index_fns.get_index(arg, axis)
                            if axis == 1 and checks.is_series(
                                    arg) and ignore_sr_names:
                                # ignore Series name
                                continue
                            if checks.is_default_index(index):
                                # ignore simple ranges without name
                                continue
                            if new_index is None:
                                new_index = index
                            else:
                                if index_from == 'strict':
                                    # If pandas objects have different index/columns, raise an exception
                                    if not pd.Index.equals(index, new_index):
                                        raise ValueError(
                                            f"Broadcasting {index_str} is not allowed when {index_str}_from=strict"
                                        )
                                # Broadcasting index must follow the rules of a regular broadcasting operation
                                # https://docs.scipy.org/doc/numpy/user/basics.broadcasting.html#general-broadcasting-rules
                                # 1. rule: if indexes are of the same length, they are simply stacked
                                # 2. rule: if index has one element, it gets repeated and then stacked

                                if pd.Index.equals(index, new_index):
                                    continue
                                if len(index) != len(new_index):
                                    if len(index) > 1 and len(new_index) > 1:
                                        raise ValueError(
                                            "Indexes could not be broadcast together"
                                        )
                                    if len(index) > len(new_index):
                                        new_index = index_fns.repeat_index(
                                            new_index, len(index))
                                    elif len(index) < len(new_index):
                                        index = index_fns.repeat_index(
                                            index, len(new_index))
                                new_index = index_fns.stack_indexes(
                                    new_index, index, **kwargs)
            else:
                raise ValueError(
                    f"Invalid value {index_from} for {'columns' if axis == 1 else 'index'}_from"
                )
        else:
            new_index = index_from
        if new_index is not None:
            if maxlen > len(new_index):
                if index_from == 'strict':
                    raise ValueError(
                        f"Broadcasting {index_str} is not allowed when {index_str}_from=strict"
                    )
                # This happens only when some numpy object is longer than the new pandas index
                # In this case, new pandas index (one element) should be repeated to match this length.
                if maxlen > 1 and len(new_index) > 1:
                    raise ValueError("Indexes could not be broadcast together")
                new_index = index_fns.repeat_index(new_index, maxlen)
        elif index_from is not None:
            # new_index=None can mean two things: 1) take original metadata or 2) reset index/columns
            # In case when index_from is not None, we choose 2)
            new_index = pd.RangeIndex(start=0, stop=maxlen, step=1)
    return new_index
Пример #4
0
 def apply_func(obj_index: tp.Index) -> tp.Index:
     if on_top:
         return index_fns.stack_indexes([index, obj_index], **kwargs)
     return index_fns.stack_indexes([obj_index, index], **kwargs)
Пример #5
0
def broadcast_index(args, to_shape, index_from=None, axis=0, **kwargs):
    """Produce a broadcasted index/columns.

    Args:
        *args (array_like): Array-like objects.
        to_shape (tuple): Target shape.
        index_from (None, int, str or array_like): Broadcasting rule for this index/these columns.

            Accepts the following values:

            * `'default'` - take the value from `vectorbt.defaults.broadcasting`
            * `None` - use the original index/columns of the objects in `args`
            * `int` - use the index/columns of the i-nth object in `args`
            * `'strict'` - ensure that all pandas objects have the same index/columns
            * `'stack'` - stack different indexes/columns using `vectorbt.base.index_fns.stack_indexes`
            * everything else will be converted to `pd.Index`

        axis (int): Set to 0 for index and 1 for columns.
        **kwargs: Keyword arguments passed to `vectorbt.base.index_fns.stack_indexes`.

    For defaults, see `vectorbt.defaults.broadcasting`.
    """
    index_str = 'columns' if axis == 1 else 'index'
    new_index = None
    if axis == 1 and len(to_shape) == 1:
        to_shape = (to_shape[0], 1)
    maxlen = to_shape[1] if axis == 1 else to_shape[0]

    if index_from is not None:
        if isinstance(index_from, int):
            # Take index/columns of the object indexed by index_from
            if not checks.is_pandas(args[index_from]):
                raise TypeError(
                    f"Argument under index {index_from} must be a pandas object"
                )
            new_index = index_fns.get_index(args[index_from], axis)
        elif isinstance(index_from, str):
            if index_from in ('stack', 'strict'):
                # If pandas objects have different index/columns, stack them together
                # maxlen stores the length of the longest index
                for arg in args:
                    if checks.is_pandas(arg):
                        index = index_fns.get_index(arg, axis)
                        if checks.is_default_index(index):
                            # ignore simple ranges without name
                            continue
                        if new_index is None:
                            new_index = index
                        else:
                            if index_from == 'strict':
                                # If pandas objects have different index/columns, raise an exception
                                if not pd.Index.equals(index, new_index):
                                    raise ValueError(
                                        f"Broadcasting {index_str} is not allowed for {index_str}_from=strict"
                                    )
                            # Broadcasting index must follow the rules of a regular broadcasting operation
                            # https://docs.scipy.org/doc/numpy/user/basics.broadcasting.html#general-broadcasting-rules
                            # 1. rule: if indexes are of the same length, they are simply stacked
                            # 2. rule: if index has one element, it gets repeated and then stacked

                            if pd.Index.equals(index, new_index):
                                continue
                            if len(index) != len(new_index):
                                if len(index) > 1 and len(new_index) > 1:
                                    raise ValueError(
                                        "Indexes could not be broadcast together"
                                    )
                                if len(index) > len(new_index):
                                    new_index = index_fns.repeat_index(
                                        new_index, len(index))
                                elif len(index) < len(new_index):
                                    index = index_fns.repeat_index(
                                        index, len(new_index))
                            new_index = index_fns.stack_indexes(
                                new_index, index, **kwargs)
            else:
                raise ValueError(
                    f"Invalid value {index_from} for {'columns' if axis == 1 else 'index'}_from"
                )
        else:
            new_index = index_from
        if new_index is not None:
            if maxlen > len(new_index):
                if index_from == 'strict':
                    raise ValueError(
                        f"Broadcasting {index_str} is not allowed for {index_str}_from=strict"
                    )
                # This happens only when some numpy object is longer than the new pandas index
                # In this case, new pandas index (one element) should be repeated to match this length.
                if maxlen > 1 and len(new_index) > 1:
                    raise ValueError("Indexes could not be broadcast together")
                new_index = index_fns.repeat_index(new_index, maxlen)
    return new_index
Пример #6
0
    def split_into_ranges(self,
                          n=None,
                          range_len=None,
                          start_idxs=None,
                          end_idxs=None):
        """Either split into `n` ranges each `range_len` long, or split into ranges between
        `start_idxs` and `end_idxs`.

        At least one of `range_len`, `n`, or `start_idxs` and `end_idxs` must be set.
        If `range_len` is `None`, will split evenly into `n` ranges.
        If `n` is `None`, will return the maximum number of ranges of length `range_len`.
        If `start_idxs` and `end_idxs`, will split into ranges between both arrays.
        Both index arrays must be either NumPy arrays with positions (last exclusive)
        or pandas indexes with labels (last inclusive).

        Created levels `range_start` and `range_end` will contain labels (last inclusive).

        !!! note
            Ranges must have the same length.

            The datetime-like format of the index will be lost as result of this operation.
            Make sure to store the index metadata such as frequency information beforehand.

        Example:
            ```python-repl
            >>> print(df.vbt.split_into_ranges(n=2))
                                            a                     b                     c
            range_start 2020-01-01 2020-01-04 2020-01-01 2020-01-04 2020-01-01 2020-01-04
            range_end   2020-01-02 2020-01-05 2020-01-02 2020-01-05 2020-01-02 2020-01-05
            0                  1.0        4.0        5.0        2.0        1.0        2.0
            1                  2.0        5.0        4.0        1.0        2.0        1.0
            >>> print(df.vbt.split_into_ranges(range_len=4))
                                            a                     b                     c
            range_start 2020-01-01 2020-01-02 2020-01-01 2020-01-02 2020-01-01 2020-01-02
            range_end   2020-01-04 2020-01-05 2020-01-04 2020-01-05 2020-01-04 2020-01-05
            0                  1.0        2.0        5.0        4.0        1.0        2.0
            1                  2.0        3.0        4.0        3.0        2.0        3.0
            2                  3.0        4.0        3.0        2.0        3.0        2.0
            3                  4.0        5.0        2.0        1.0        2.0        1.0
            >>> print(df.vbt.split_into_ranges(start_idxs=[0, 1], end_idxs=[4, 5]))
                                            a                     b                     c
            range_start 2020-01-01 2020-01-02 2020-01-01 2020-01-02 2020-01-01 2020-01-02
            range_end   2020-01-04 2020-01-05 2020-01-04 2020-01-05 2020-01-04 2020-01-05
            0                    1          2          5          4          1          2
            1                    2          3          4          3          2          3
            2                    3          4          3          2          3          2
            3                    4          5          2          1          2          1
            >>> print(df.vbt.split_into_ranges(
            ...     start_idxs=pd.Index(['2020-01-01', '2020-01-03']),
            ...     end_idxs=pd.Index(['2020-01-02', '2020-01-04'])
            ... ))
                                            a                     b                     c
            range_start 2020-01-01 2020-01-03 2020-01-01 2020-01-03 2020-01-01 2020-01-03
            range_end   2020-01-02 2020-01-04 2020-01-02 2020-01-04 2020-01-02 2020-01-04
            0                    1          3          5          3          1          3
            1                    2          4          4          2          2          2
            ```"""
        if start_idxs is None and end_idxs is None:
            if range_len is None and n is None:
                raise ValueError(
                    "At least range_len, n, or start_idxs and end_idxs must be set"
                )
            if range_len is None:
                range_len = len(self.index) // n
            start_idxs = np.arange(len(self.index) - range_len + 1)
            end_idxs = np.arange(range_len, len(self.index) + 1)
        elif start_idxs is None or end_idxs is None:
            raise ValueError("Both start_idxs and end_idxs must be set")
        else:
            if isinstance(start_idxs, pd.Index):
                start_idxs = np.where(self.index.isin(start_idxs))[0]
            else:
                start_idxs = np.asarray(start_idxs)
            if isinstance(end_idxs, pd.Index):
                end_idxs = np.where(self.index.isin(end_idxs))[0] + 1
            else:
                end_idxs = np.asarray(end_idxs)

        if np.any((end_idxs - start_idxs) != (end_idxs - start_idxs).item(0)):
            raise ValueError("Ranges must have the same length")

        if n is not None:
            if n > len(start_idxs):
                raise ValueError(
                    f"n cannot be bigger than the maximum number of ranges {len(start_idxs)}"
                )
            idxs = np.round(np.linspace(0, len(start_idxs) - 1, n)).astype(int)
            start_idxs = start_idxs[idxs]
            end_idxs = end_idxs[idxs]
        matrix = nb.concat_ranges_nb(self.to_2d_array(), start_idxs, end_idxs)
        range_starts = pd.Index(self.index[start_idxs], name='range_start')
        range_ends = pd.Index(self.index[end_idxs - 1], name='range_end')
        range_columns = index_fns.stack_indexes(range_starts, range_ends)
        new_columns = index_fns.combine_indexes(self.columns, range_columns)
        return pd.DataFrame(matrix, columns=new_columns)
Пример #7
0
 def apply_func(obj_index):
     if on_top:
         return index_fns.stack_indexes(index, obj_index)
     return index_fns.stack_indexes(obj_index, index)