示例#1
0
def wrap_broadcasted(old_arg,
                     new_arg,
                     is_pd=False,
                     new_index=None,
                     new_columns=None):
    """If the newly brodcasted array was originally a pandas object, make it pandas object again 
    and assign it the newly broadcasted index/columns."""
    if is_pd:
        if checks.is_pandas(old_arg):
            if new_index is None:
                # Take index from original pandas object
                old_index = index_fns.get_index(old_arg, 0)
                if old_arg.shape[0] == new_arg.shape[0]:
                    new_index = old_index
                else:
                    new_index = index_fns.repeat_index(old_index,
                                                       new_arg.shape[0])
            if new_columns is None:
                # Take columns from original pandas object
                old_columns = index_fns.get_index(old_arg, 1)
                new_ncols = new_arg.shape[1] if new_arg.ndim == 2 else 1
                if len(old_columns) == new_ncols:
                    new_columns = old_columns
                else:
                    new_columns = index_fns.repeat_index(
                        old_columns, new_ncols)
        return array_wrapper.ArrayWrapper(index=new_index,
                                          columns=new_columns,
                                          ndim=new_arg.ndim).wrap(new_arg)
    return new_arg
示例#2
0
def wrap_broadcasted(old_arg, new_arg, is_pd=False, new_index=None, new_columns=None):
    """If the newly brodcasted array was originally a pandas object, make it pandas object again 
    and assign it the newly broadcast index/columns."""
    if is_pd:
        if checks.is_pandas(old_arg):
            if new_index is None:
                # Take index from original pandas object
                old_index = index_fns.get_index(old_arg, 0)
                if old_arg.shape[0] == new_arg.shape[0]:
                    new_index = old_index
                else:
                    new_index = index_fns.repeat_index(old_index, new_arg.shape[0])
            if new_columns is None:
                # Take columns from original pandas object
                old_columns = index_fns.get_index(old_arg, 1)
                new_ncols = new_arg.shape[1] if new_arg.ndim == 2 else 1
                if len(old_columns) == new_ncols:
                    new_columns = old_columns
                else:
                    new_columns = index_fns.repeat_index(old_columns, new_ncols)
        if new_arg.ndim == 2:
            return pd.DataFrame(new_arg, index=new_index, columns=new_columns)
        if new_columns is not None and len(new_columns) == 1:
            name = new_columns[0]
            if name == 0:
                name = None
        else:
            name = None
        return pd.Series(new_arg, index=new_index, name=name)
    return new_arg
示例#3
0
 def from_obj(cls: tp.Type[ArrayWrapperT], obj: tp.ArrayLike, *args,
              **kwargs) -> ArrayWrapperT:
     """Derive metadata from an object."""
     pd_obj = to_pd_array(obj)
     index = index_fns.get_index(pd_obj, 0)
     columns = index_fns.get_index(pd_obj, 1)
     ndim = pd_obj.ndim
     return cls(index, columns, ndim, *args, **kwargs)
示例#4
0
def broadcast_to(arg1,
                 arg2,
                 to_pd=None,
                 index_from=None,
                 columns_from=None,
                 **kwargs):
    """Broadcast `arg1` to `arg2`.

    Keyword arguments `**kwargs` are passed to `broadcast`.

    ## Example

    ```python-repl
    >>> import numpy as np
    >>> import pandas as pd
    >>> from vectorbt.base.reshape_fns import broadcast_to

    >>> a = np.array([1, 2, 3])
    >>> sr = pd.Series([4, 5, 6], index=pd.Index(['x', 'y', 'z']), name='a')

    >>> broadcast_to(a, sr)
    x    1
    y    2
    z    3
    Name: a, dtype: int64

    >>> broadcast_to(sr, a)
    array([4, 5, 6])
    ```
    """
    if not checks.is_array(arg1):
        arg1 = np.asarray(arg1)
    if not checks.is_array(arg2):
        arg2 = np.asarray(arg2)
    if to_pd is None:
        to_pd = checks.is_pandas(arg2)
    if to_pd:
        # Take index and columns from arg2
        if index_from is None:
            index_from = index_fns.get_index(arg2, 0)
        if columns_from is None:
            columns_from = index_fns.get_index(arg2, 1)
    return broadcast(arg1,
                     to_shape=arg2.shape,
                     to_pd=to_pd,
                     index_from=index_from,
                     columns_from=columns_from,
                     **kwargs)
示例#5
0
def broadcast_to(arg1: tp.ArrayLike,
                 arg2: tp.ArrayLike,
                 to_pd: tp.Optional[bool] = None,
                 index_from: tp.Optional[IndexFromLike] = None,
                 columns_from: tp.Optional[IndexFromLike] = None,
                 **kwargs) -> BCRT:
    """Broadcast `arg1` to `arg2`.

    Pass None to `index_from`/`columns_from` to use index/columns of the second argument.

    Keyword arguments `**kwargs` are passed to `broadcast`.

    ## Example

    ```python-repl
    >>> import numpy as np
    >>> import pandas as pd
    >>> from vectorbt.base.reshape_fns import broadcast_to

    >>> a = np.array([1, 2, 3])
    >>> sr = pd.Series([4, 5, 6], index=pd.Index(['x', 'y', 'z']), name='a')

    >>> broadcast_to(a, sr)
    x    1
    y    2
    z    3
    Name: a, dtype: int64

    >>> broadcast_to(sr, a)
    array([4, 5, 6])
    ```
    """
    arg1 = to_any_array(arg1)
    arg2 = to_any_array(arg2)
    if to_pd is None:
        to_pd = checks.is_pandas(arg2)
    if to_pd:
        # Take index and columns from arg2
        if index_from is None:
            index_from = index_fns.get_index(arg2, 0)
        if columns_from is None:
            columns_from = index_fns.get_index(arg2, 1)
    return broadcast(arg1,
                     to_shape=arg2.shape,
                     to_pd=to_pd,
                     index_from=index_from,
                     columns_from=columns_from,
                     **kwargs)
示例#6
0
def broadcast_index(args,
                    to_shape,
                    index_from=None,
                    axis=0,
                    ignore_sr_names=None,
                    **kwargs):
    """Produce a broadcast index/columns.

    Args:
        *args (array_like): Array-like objects.
        to_shape (tuple): Target shape.
        index_from (None, int, str or array_like): Broadcasting rule for this index/these columns.

            Accepts the following values:

            * 'default' - take the value from `vectorbt.settings.broadcasting`
            * 'strict' - ensure that all pandas objects have the same index/columns
            * 'stack' - stack different indexes/columns using `vectorbt.base.index_fns.stack_indexes`
            * 'ignore' - ignore any index/columns
            * integer - use the index/columns of the i-nth object in `args`
            * None - use the original index/columns of the objects in `args`
            * everything else will be converted to `pd.Index`

        axis (int): Set to 0 for index and 1 for columns.
        ignore_sr_names (bool): Whether to ignore Series names if they are in conflict.

            Conflicting Series names are those that are different but not None.
        **kwargs: Keyword arguments passed to `vectorbt.base.index_fns.stack_indexes`.

    For defaults, see `vectorbt.settings.broadcasting`.

    !!! note
        Series names are treated as columns with a single element but without a name.
        If a column level without a name loses its meaning, better to convert Series to DataFrames
        with one column prior to broadcasting. If the name of a Series is not that important,
        better to drop it altogether by setting it to None.
    """
    from vectorbt import settings

    if ignore_sr_names is None:
        ignore_sr_names = settings.broadcasting['ignore_sr_names']
    index_str = 'columns' if axis == 1 else 'index'
    to_shape_2d = (to_shape[0], 1) if len(to_shape) == 1 else to_shape
    # maxlen stores the length of the longest index
    maxlen = to_shape_2d[1] if axis == 1 else to_shape_2d[0]
    new_index = None

    if index_from is not None:
        if isinstance(index_from, int):
            # Take index/columns of the object indexed by index_from
            if not checks.is_pandas(args[index_from]):
                raise TypeError(
                    f"Argument under index {index_from} must be a pandas object"
                )
            new_index = index_fns.get_index(args[index_from], axis)
        elif isinstance(index_from, str):
            if index_from == 'ignore':
                # Ignore index/columns
                new_index = pd.RangeIndex(start=0, stop=maxlen, step=1)
            elif index_from in ('stack', 'strict'):
                # Check whether all indexes/columns are equal
                last_index = None  # of type pd.Index
                index_conflict = False
                for arg in args:
                    if checks.is_pandas(arg):
                        index = index_fns.get_index(arg, axis)
                        if last_index is not None:
                            if not pd.Index.equals(index, last_index):
                                index_conflict = True
                        last_index = index
                        continue
                if not index_conflict:
                    new_index = last_index
                else:
                    # If pandas objects have different index/columns, stack them together
                    for arg in args:
                        if checks.is_pandas(arg):
                            index = index_fns.get_index(arg, axis)
                            if axis == 1 and checks.is_series(
                                    arg) and ignore_sr_names:
                                # ignore Series name
                                continue
                            if checks.is_default_index(index):
                                # ignore simple ranges without name
                                continue
                            if new_index is None:
                                new_index = index
                            else:
                                if index_from == 'strict':
                                    # If pandas objects have different index/columns, raise an exception
                                    if not pd.Index.equals(index, new_index):
                                        raise ValueError(
                                            f"Broadcasting {index_str} is not allowed when {index_str}_from=strict"
                                        )
                                # Broadcasting index must follow the rules of a regular broadcasting operation
                                # https://docs.scipy.org/doc/numpy/user/basics.broadcasting.html#general-broadcasting-rules
                                # 1. rule: if indexes are of the same length, they are simply stacked
                                # 2. rule: if index has one element, it gets repeated and then stacked

                                if pd.Index.equals(index, new_index):
                                    continue
                                if len(index) != len(new_index):
                                    if len(index) > 1 and len(new_index) > 1:
                                        raise ValueError(
                                            "Indexes could not be broadcast together"
                                        )
                                    if len(index) > len(new_index):
                                        new_index = index_fns.repeat_index(
                                            new_index, len(index))
                                    elif len(index) < len(new_index):
                                        index = index_fns.repeat_index(
                                            index, len(new_index))
                                new_index = index_fns.stack_indexes(
                                    new_index, index, **kwargs)
            else:
                raise ValueError(
                    f"Invalid value {index_from} for {'columns' if axis == 1 else 'index'}_from"
                )
        else:
            new_index = index_from
        if new_index is not None:
            if maxlen > len(new_index):
                if index_from == 'strict':
                    raise ValueError(
                        f"Broadcasting {index_str} is not allowed when {index_str}_from=strict"
                    )
                # This happens only when some numpy object is longer than the new pandas index
                # In this case, new pandas index (one element) should be repeated to match this length.
                if maxlen > 1 and len(new_index) > 1:
                    raise ValueError("Indexes could not be broadcast together")
                new_index = index_fns.repeat_index(new_index, maxlen)
        elif index_from is not None:
            # new_index=None can mean two things: 1) take original metadata or 2) reset index/columns
            # In case when index_from is not None, we choose 2)
            new_index = pd.RangeIndex(start=0, stop=maxlen, step=1)
    return new_index
示例#7
0
    def _indexing_func_meta(self,
                            pd_indexing_func,
                            index=None,
                            columns=None,
                            column_only_select=None,
                            group_select=None,
                            group_by=None):
        """Perform indexing on `ArrayWrapper` and also return indexing metadata.

        Takes into account column grouping.

        Set `column_only_select` to True to index the array wrapper as a Series of columns.
        This way, selection of index (axis 0) can be avoided. Set `group_select` to True
        to select groups rather than columns. Takes effect only if grouping is enabled.

        !!! note
            If `column_only_select` is True, make sure to index the array wrapper
            as a Series of columns rather than a DataFrame. For example, the operation
            `.iloc[:, :2]` should become `.iloc[:2]`. Operations are not allowed if the
            object is already a Series and thus has only one column/group."""
        from vectorbt import settings

        if column_only_select is None:
            column_only_select = self.column_only_select
        if column_only_select is None:
            column_only_select = settings.array_wrapper['column_only_select']
        if group_select is None:
            group_select = self.group_select
        if group_select is None:
            group_select = settings.array_wrapper['group_select']
        _self = self.regroup(group_by)
        group_select = group_select and _self.grouper.is_grouped()
        if index is None:
            index = _self.index
        if columns is None:
            if group_select:
                columns = _self.grouper.get_columns()
            else:
                columns = _self.columns
        if group_select:
            # Groups as columns
            i_wrapper = ArrayWrapper(index, columns, _self.get_ndim())
        else:
            # Columns as columns
            i_wrapper = ArrayWrapper(index, columns, _self.ndim)
        n_rows = len(index)
        n_cols = len(columns)

        if column_only_select:
            if i_wrapper.ndim == 1:
                raise IndexingError(
                    "Columns only: Attempting to select a column on a Series")
            col_mapper = i_wrapper.wrap_reduced(np.arange(n_cols),
                                                columns=columns)
            try:
                col_mapper = pd_indexing_func(col_mapper)
            except pd.core.indexing.IndexingError as e:
                warnings.warn(
                    "Columns only: Make sure to treat this object "
                    "as a Series of columns rather than a DataFrame",
                    stacklevel=2)
                raise e
            if checks.is_series(col_mapper):
                new_columns = col_mapper.index
                col_idxs = col_mapper.values
                new_ndim = 2
            else:
                new_columns = columns[[col_mapper]]
                col_idxs = col_mapper
                new_ndim = 1
            new_index = index
            idx_idxs = np.arange(len(index))
        else:
            idx_mapper = i_wrapper.wrap(np.broadcast_to(
                np.arange(n_rows)[:, None], (n_rows, n_cols)),
                                        index=index,
                                        columns=columns)
            idx_mapper = pd_indexing_func(idx_mapper)
            if i_wrapper.ndim == 1:
                if not checks.is_series(idx_mapper):
                    raise IndexingError("Selection of a scalar is not allowed")
                idx_idxs = idx_mapper.values
                col_idxs = 0
            else:
                col_mapper = i_wrapper.wrap(np.broadcast_to(
                    np.arange(n_cols), (n_rows, n_cols)),
                                            index=index,
                                            columns=columns)
                col_mapper = pd_indexing_func(col_mapper)
                if checks.is_frame(idx_mapper):
                    idx_idxs = idx_mapper.values[:, 0]
                    col_idxs = col_mapper.values[0]
                elif checks.is_series(idx_mapper):
                    one_col = np.all(
                        col_mapper.values == col_mapper.values.item(0))
                    one_idx = np.all(
                        idx_mapper.values == idx_mapper.values.item(0))
                    if one_col and one_idx:
                        # One index and one column selected, multiple times
                        raise IndexingError(
                            "Must select at least two unique indices in one of both axes"
                        )
                    elif one_col:
                        # One column selected
                        idx_idxs = idx_mapper.values
                        col_idxs = col_mapper.values[0]
                    elif one_idx:
                        # One index selected
                        idx_idxs = idx_mapper.values[0]
                        col_idxs = col_mapper.values
                    else:
                        raise IndexingError
                else:
                    raise IndexingError("Selection of a scalar is not allowed")
            new_index = index_fns.get_index(idx_mapper, 0)
            if not isinstance(idx_idxs, np.ndarray):
                # One index selected
                new_columns = index[[idx_idxs]]
            elif not isinstance(col_idxs, np.ndarray):
                # One column selected
                new_columns = columns[[col_idxs]]
            else:
                new_columns = index_fns.get_index(idx_mapper, 1)
            new_ndim = idx_mapper.ndim

        if _self.grouper.is_grouped():
            # Grouping enabled
            if np.asarray(idx_idxs).ndim == 0:
                raise IndexingError(
                    "Flipping index and columns is not allowed")

            if group_select:
                # Selection based on groups
                # Get indices of columns corresponding to selected groups
                group_idxs = col_idxs
                group_idxs_arr = reshape_fns.to_1d(group_idxs)
                group_start_idxs = _self.grouper.get_group_start_idxs(
                )[group_idxs_arr]
                group_end_idxs = _self.grouper.get_group_end_idxs(
                )[group_idxs_arr]
                ungrouped_col_idxs = get_ranges_arr(group_start_idxs,
                                                    group_end_idxs)
                ungrouped_columns = _self.columns[ungrouped_col_idxs]
                if new_ndim == 1 and len(ungrouped_columns) == 1:
                    ungrouped_ndim = 1
                    ungrouped_col_idxs = ungrouped_col_idxs[0]
                else:
                    ungrouped_ndim = 2

                # Get indices of selected groups corresponding to the new columns
                # We could do _self.group_by[ungrouped_col_idxs] but indexing operation may have changed the labels
                group_lens = _self.grouper.get_group_lens()[group_idxs_arr]
                ungrouped_group_idxs = np.full(len(ungrouped_columns), 0)
                ungrouped_group_idxs[group_lens[:-1]] = 1
                ungrouped_group_idxs = np.cumsum(ungrouped_group_idxs)

                return _self.copy(index=new_index,
                                  columns=ungrouped_columns,
                                  ndim=ungrouped_ndim,
                                  grouped_ndim=new_ndim,
                                  group_by=new_columns[ungrouped_group_idxs]
                                  ), idx_idxs, group_idxs, ungrouped_col_idxs

            # Selection based on columns
            col_idxs_arr = reshape_fns.to_1d(col_idxs)
            return _self.copy(index=new_index,
                              columns=new_columns,
                              ndim=new_ndim,
                              grouped_ndim=None,
                              group_by=_self.grouper.group_by[col_idxs_arr]
                              ), idx_idxs, col_idxs, col_idxs

        # Grouping disabled
        return _self.copy(index=new_index,
                          columns=new_columns,
                          ndim=new_ndim,
                          grouped_ndim=None,
                          group_by=None), idx_idxs, col_idxs, col_idxs
示例#8
0
 def from_obj(cls, obj, *args, **kwargs):
     """Derive metadata from an object."""
     index = index_fns.get_index(obj, 0)
     columns = index_fns.get_index(obj, 1)
     ndim = obj.ndim
     return cls(index, columns, ndim, *args, **kwargs)
示例#9
0
def broadcast_index(args, to_shape, index_from=None, axis=0, **kwargs):
    """Produce a broadcasted index/columns.

    Args:
        *args (array_like): Array-like objects.
        to_shape (tuple): Target shape.
        index_from (None, int, str or array_like): Broadcasting rule for this index/these columns.

            Accepts the following values:

            * `'default'` - take the value from `vectorbt.defaults.broadcasting`
            * `None` - use the original index/columns of the objects in `args`
            * `int` - use the index/columns of the i-nth object in `args`
            * `'strict'` - ensure that all pandas objects have the same index/columns
            * `'stack'` - stack different indexes/columns using `vectorbt.base.index_fns.stack_indexes`
            * everything else will be converted to `pd.Index`

        axis (int): Set to 0 for index and 1 for columns.
        **kwargs: Keyword arguments passed to `vectorbt.base.index_fns.stack_indexes`.

    For defaults, see `vectorbt.defaults.broadcasting`.
    """
    index_str = 'columns' if axis == 1 else 'index'
    new_index = None
    if axis == 1 and len(to_shape) == 1:
        to_shape = (to_shape[0], 1)
    maxlen = to_shape[1] if axis == 1 else to_shape[0]

    if index_from is not None:
        if isinstance(index_from, int):
            # Take index/columns of the object indexed by index_from
            if not checks.is_pandas(args[index_from]):
                raise TypeError(
                    f"Argument under index {index_from} must be a pandas object"
                )
            new_index = index_fns.get_index(args[index_from], axis)
        elif isinstance(index_from, str):
            if index_from in ('stack', 'strict'):
                # If pandas objects have different index/columns, stack them together
                # maxlen stores the length of the longest index
                for arg in args:
                    if checks.is_pandas(arg):
                        index = index_fns.get_index(arg, axis)
                        if checks.is_default_index(index):
                            # ignore simple ranges without name
                            continue
                        if new_index is None:
                            new_index = index
                        else:
                            if index_from == 'strict':
                                # If pandas objects have different index/columns, raise an exception
                                if not pd.Index.equals(index, new_index):
                                    raise ValueError(
                                        f"Broadcasting {index_str} is not allowed for {index_str}_from=strict"
                                    )
                            # Broadcasting index must follow the rules of a regular broadcasting operation
                            # https://docs.scipy.org/doc/numpy/user/basics.broadcasting.html#general-broadcasting-rules
                            # 1. rule: if indexes are of the same length, they are simply stacked
                            # 2. rule: if index has one element, it gets repeated and then stacked

                            if pd.Index.equals(index, new_index):
                                continue
                            if len(index) != len(new_index):
                                if len(index) > 1 and len(new_index) > 1:
                                    raise ValueError(
                                        "Indexes could not be broadcast together"
                                    )
                                if len(index) > len(new_index):
                                    new_index = index_fns.repeat_index(
                                        new_index, len(index))
                                elif len(index) < len(new_index):
                                    index = index_fns.repeat_index(
                                        index, len(new_index))
                            new_index = index_fns.stack_indexes(
                                new_index, index, **kwargs)
            else:
                raise ValueError(
                    f"Invalid value {index_from} for {'columns' if axis == 1 else 'index'}_from"
                )
        else:
            new_index = index_from
        if new_index is not None:
            if maxlen > len(new_index):
                if index_from == 'strict':
                    raise ValueError(
                        f"Broadcasting {index_str} is not allowed for {index_str}_from=strict"
                    )
                # This happens only when some numpy object is longer than the new pandas index
                # In this case, new pandas index (one element) should be repeated to match this length.
                if maxlen > 1 and len(new_index) > 1:
                    raise ValueError("Indexes could not be broadcast together")
                new_index = index_fns.repeat_index(new_index, maxlen)
    return new_index