Пример #1
0
def broadcast_to(arg1,
                 arg2,
                 index_from=1,
                 columns_from=1,
                 writeable=False,
                 copy_kwargs={},
                 raw=False,
                 **kwargs):
    """Bring first argument to the shape of second argument. 

    Closely resembles the other broadcast function."""
    if not checks.is_array_like(arg1):
        arg1 = np.asarray(arg1)
    if not checks.is_array_like(arg2):
        arg2 = np.asarray(arg2)

    is_2d = arg1.ndim > 1 or arg2.ndim > 1
    is_pd = checks.is_pandas(arg1) or checks.is_pandas(arg2)

    if is_pd:
        if is_2d:
            if checks.is_series(arg1):
                arg1 = arg1.to_frame()
            if checks.is_series(arg2):
                arg2 = arg2.to_frame()

        new_index = broadcast_index(arg1,
                                    arg2,
                                    index_from=index_from,
                                    axis=0,
                                    is_2d=is_2d,
                                    **kwargs)
        new_columns = broadcast_index(arg1,
                                      arg2,
                                      index_from=columns_from,
                                      axis=1,
                                      is_2d=is_2d,
                                      **kwargs)
    else:
        new_index, new_columns = None, None

    if is_broadcasting_needed(arg1, arg2):
        arg1_new = np.broadcast_to(arg1, arg2.shape, subok=True)
        arg1_new = np.array(arg1_new, copy=writeable, **copy_kwargs)
    else:
        arg1_new = np.array(arg1, copy=False, **copy_kwargs)
    return wrap_broadcasted(arg1,
                            arg1_new,
                            is_pd=is_pd,
                            new_index=new_index,
                            new_columns=new_columns)
Пример #2
0
def wrap_broadcasted(old_arg,
                     new_arg,
                     is_pd=False,
                     new_index=None,
                     new_columns=None):
    """Transform newly broadcasted array to match the type of the original object."""
    if is_pd:
        if checks.is_pandas(old_arg):
            if new_index is None:
                # Take index from original pandas object
                if old_arg.shape[0] == new_arg.shape[0]:
                    new_index = old_arg.index
                else:
                    new_index = index_fns.repeat(old_arg.index,
                                                 new_arg.shape[0])
            if new_columns is None:
                # Take columns from original pandas object
                if new_arg.ndim == 2:
                    if checks.is_series(old_arg):
                        old_arg = old_arg.to_frame()
                    if old_arg.shape[1] == new_arg.shape[1]:
                        new_columns = old_arg.columns
                    else:
                        new_columns = index_fns.repeat(old_arg.columns,
                                                       new_arg.shape[1])
        else:
            if new_index is None and new_columns is None:
                # Return plain numpy array if not pandas and no rules set
                return new_arg
        return wrap_array(new_arg, index=new_index, columns=new_columns)
    return new_arg
Пример #3
0
def wrap_broadcasted(old_arg,
                     new_arg,
                     is_pd=False,
                     new_index=None,
                     new_columns=None):
    """If the newly brodcasted array was originally a pandas object, make it pandas object again 
    and assign it the newly broadcasted index/columns."""
    if is_pd:
        if checks.is_pandas(old_arg):
            if new_index is None:
                # Take index from original pandas object
                if old_arg.shape[0] == new_arg.shape[0]:
                    new_index = old_arg.index
                else:
                    new_index = index_fns.repeat_index(old_arg.index,
                                                       new_arg.shape[0])
            if new_columns is None:
                # Take columns from original pandas object
                if new_arg.ndim == 2:
                    if checks.is_series(old_arg):
                        old_arg = old_arg.to_frame()
                    if old_arg.shape[1] == new_arg.shape[1]:
                        new_columns = old_arg.columns
                    else:
                        new_columns = index_fns.repeat_index(
                            old_arg.columns, new_arg.shape[1])
        else:
            if new_index is None and new_columns is None:
                # Return plain numpy array if not pandas and no rules set
                return new_arg
        return ArrayWrapper(index=new_index, columns=new_columns).wrap(new_arg)
    return new_arg
Пример #4
0
def broadcast_ts(ts, params_len, new_columns):
    """Broadcast time series `ts` to match the length of `new_columns` through tiling."""
    if checks.is_series(ts) or len(new_columns) > ts.shape[1]:
        return ts.vbt.wrap(reshape_fns.tile(ts.values, params_len, axis=1),
                           columns=new_columns)
    else:
        return ts.vbt.wrap(ts, columns=new_columns)
Пример #5
0
 def __call__(self, trace_names=None, **kwargs):
     if trace_names is None:
         if checks.is_frame(self._obj) or (checks.is_series(self._obj)
                                           and self._obj.name is not None):
             trace_names = reshape_fns.to_2d(self._obj).columns
     return widgets.Histogram(trace_names=trace_names,
                              data=self._obj.values,
                              **kwargs)
Пример #6
0
 def __call__(self, x_labels=None, trace_names=None, **kwargs):
     if x_labels is None:
         x_labels = self._obj.index
     if trace_names is None:
         if checks.is_frame(self._obj) or (checks.is_series(self._obj)
                                           and self._obj.name is not None):
             trace_names = reshape_fns.to_2d(self._obj).columns
     return widgets.Scatter(x_labels,
                            trace_names=trace_names,
                            data=self._obj.values,
                            **kwargs)
Пример #7
0
 def empty_like(cls, other, fill_value=np.nan):
     """Generate an empty Series/DataFrame like `other` and fill with `fill_value`."""
     if checks.is_series(other):
         return cls.empty(other.shape,
                          fill_value=fill_value,
                          index=other.index,
                          name=other.name)
     return cls.empty(other.shape,
                      fill_value=fill_value,
                      index=other.index,
                      columns=other.columns)
Пример #8
0
def mapper_indexing_func(mapper, ref_obj, pd_indexing_func):
    """Broadcast `mapper` Series to `ref_obj` and perform pandas indexing using `pd_indexing_func`."""
    checks.assert_type(mapper, pd.Series)
    checks.assert_type(ref_obj, (pd.Series, pd.DataFrame))

    df_range_mapper = reshape_fns.broadcast_to(np.arange(len(mapper.index)), ref_obj)
    loced_range_mapper = pd_indexing_func(df_range_mapper)
    new_mapper = mapper.iloc[loced_range_mapper.values[0]]
    if checks.is_frame(loced_range_mapper):
        return pd.Series(new_mapper.values, index=loced_range_mapper.columns, name=mapper.name)
    elif checks.is_series(loced_range_mapper):
        return pd.Series([new_mapper], index=[loced_range_mapper.name], name=mapper.name)
Пример #9
0
def get_index(arg, axis):
    """Get index of `arg` by `axis`."""
    checks.assert_type(arg, (pd.Series, pd.DataFrame))
    checks.assert_in(axis, (0, 1))

    if axis == 0:
        return arg.index
    else:
        if checks.is_series(arg):
            if arg.name is not None:
                return pd.Index([arg.name])
            return pd.Index([0])  # same as how pandas does it
        else:
            return arg.columns
Пример #10
0
def indexing_on_mapper(mapper: tp.Series, ref_obj: tp.SeriesFrame,
                       pd_indexing_func: tp.Callable) -> tp.Optional[tp.Series]:
    """Broadcast `mapper` Series to `ref_obj` and perform pandas indexing using `pd_indexing_func`."""
    checks.assert_instance_of(mapper, pd.Series)
    checks.assert_instance_of(ref_obj, (pd.Series, pd.DataFrame))

    df_range_mapper = reshape_fns.broadcast_to(np.arange(len(mapper.index)), ref_obj)
    loced_range_mapper = pd_indexing_func(df_range_mapper)
    new_mapper = mapper.iloc[loced_range_mapper.values[0]]
    if checks.is_frame(loced_range_mapper):
        return pd.Series(new_mapper.values, index=loced_range_mapper.columns, name=mapper.name)
    elif checks.is_series(loced_range_mapper):
        return pd.Series([new_mapper], index=[loced_range_mapper.name], name=mapper.name)
    return None
Пример #11
0
 def indexing_func(self: BaseAccessorT, pd_indexing_func: tp.PandasIndexingFunc, **kwargs) -> BaseAccessorT:
     """Perform indexing on `BaseAccessor`."""
     new_wrapper, idx_idxs, _, col_idxs = self.wrapper.indexing_func_meta(pd_indexing_func, **kwargs)
     new_obj = new_wrapper.wrap(self.to_2d_array()[idx_idxs, :][:, col_idxs], group_by=False)
     if checks.is_series(new_obj):
         return self.replace(
             cls_=self.sr_accessor_cls,
             obj=new_obj,
             wrapper=new_wrapper
         )
     return self.replace(
         cls_=self.df_accessor_cls,
         obj=new_obj,
         wrapper=new_wrapper
     )
Пример #12
0
def soft_to_ndim(arg, ndim):
    """Try to softly bring `arg` to the specified number of dimensions `ndim` (max 2)."""
    if not checks.is_array(arg):
        arg = np.asarray(arg)
    if ndim == 1:
        if arg.ndim == 2:
            if arg.shape[1] == 1:
                if checks.is_frame(arg):
                    return arg.iloc[:, 0]
                return arg[:, 0]  # downgrade
    if ndim == 2:
        if arg.ndim == 1:
            if checks.is_series(arg):
                return arg.to_frame()
            return arg[:, None]  # upgrade
    return arg  # do nothing
Пример #13
0
 def empty_like(cls,
                other: tp.SeriesFrame,
                fill_value: tp.Scalar = np.nan,
                **kwargs) -> tp.SeriesFrame:
     """Generate an empty Series/DataFrame like `other` and fill with `fill_value`."""
     if checks.is_series(other):
         return cls.empty(other.shape,
                          fill_value=fill_value,
                          index=other.index,
                          name=other.name,
                          **kwargs)
     return cls.empty(other.shape,
                      fill_value=fill_value,
                      index=other.index,
                      columns=other.columns,
                      **kwargs)
Пример #14
0
def soft_to_ndim(arg: tp.ArrayLike,
                 ndim: int,
                 raw: bool = False) -> tp.AnyArray:
    """Try to softly bring `arg` to the specified number of dimensions `ndim` (max 2)."""
    arg = to_any_array(arg, raw=raw)
    if ndim == 1:
        if arg.ndim == 2:
            if arg.shape[1] == 1:
                if checks.is_frame(arg):
                    return arg.iloc[:, 0]
                return arg[:, 0]  # downgrade
    if ndim == 2:
        if arg.ndim == 1:
            if checks.is_series(arg):
                return arg.to_frame()
            return arg[:, None]  # upgrade
    return arg  # do nothing
Пример #15
0
def to_2d(arg, raw=False, expand_axis=1):
    """Reshape argument to two dimensions. 

    If `raw` is True, returns NumPy array.
    If 1-dim, will expand along axis 1 (i.e., Series to DataFrame with one column)."""
    if raw or not checks.is_array(arg):
        arg = np.asarray(arg)
    if arg.ndim == 2:
        return arg
    elif arg.ndim == 1:
        if checks.is_series(arg):
            if expand_axis == 0:
                return pd.DataFrame(arg.values[None, :], columns=arg.index)
            elif expand_axis == 1:
                return arg.to_frame()
        return np.expand_dims(arg, expand_axis)
    elif arg.ndim == 0:
        return arg.reshape((1, 1))
    raise ValueError(f"Cannot reshape a {arg.ndim}-dimensional array to 2 dimensions")
Пример #16
0
def to_2d(arg, raw=False, expand_axis=1):
    """Reshape argument to two dimensions."""
    if raw:
        arg = np.asarray(arg)
    if not checks.is_array_like(arg):
        arg = np.asarray(arg)
    if arg.ndim == 2:
        return arg
    elif arg.ndim == 1:
        if checks.is_series(arg):
            if expand_axis == 0:
                return pd.DataFrame(arg.values[None, :], columns=arg.index)
            elif expand_axis == 1:
                return arg.to_frame()
        return np.expand_dims(arg, expand_axis)
    elif arg.ndim == 0:
        return arg.reshape((1, 1))
    raise ValueError(
        f"Cannot reshape a {arg.ndim}-dimensional array to 2 dimensions")
Пример #17
0
def to_mapping(mapping_like: tp.MappingLike, reverse: bool = False) -> dict:
    """Convert mapping-like object to a mapping.

    Enable `reverse` to apply `reverse_mapping` on the result dict."""
    if checks.is_namedtuple(mapping_like):
        mapping = {v: k for k, v in mapping_like._asdict().items()}
        if -1 not in mapping_like:
            mapping[-1] = None
    elif not checks.is_mapping(mapping_like):
        if checks.is_index(mapping_like):
            mapping_like = mapping_like.to_series().reset_index(drop=True)
        if checks.is_series(mapping_like):
            mapping = mapping_like.to_dict()
        else:
            mapping = dict(enumerate(mapping_like))
    else:
        mapping = dict(mapping_like)
    if reverse:
        mapping = reverse_mapping(mapping)
    return mapping
Пример #18
0
    def from_orders(cls,
                    main_price,
                    order_size,
                    size_type=SizeType.Shares,
                    order_price=None,
                    init_capital=None,
                    fees=None,
                    fixed_fees=None,
                    slippage=None,
                    broadcast_kwargs={},
                    freq=None,
                    **kwargs):
        """Build portfolio from orders.

        Starting with initial capital `init_capital`, at each time step, orders the number
        of shares specified in `order_size` for `order_price`.

        For more details, see `vectorbt.portfolio.nb.simulate_from_orders_nb`.

        Args:
            main_price (pandas_like): Main price of the asset, such as close. Will broadcast.
            order_size (float or array_like): The amount of shares to order. Will broadcast.

                If the size is positive, this is the number of shares to buy.
                If the size is negative, this is the number of shares to sell.
                To buy/sell everything, set the size to `np.inf`.
            size_type (int or array_like): See `vectorbt.portfolio.enums.SizeType`.
            order_price (array_like): Order price. Defaults to `main_price`. Will broadcast.
            init_capital (float or array_like): The initial capital. Will broadcast.

                Allowed is either a single value or value per column.
            fees (float or array_like): Fees in percentage of the order value. Will broadcast.
            fixed_fees (float or array_like): Fixed amount of fees to pay per order. Will broadcast.
            slippage (float or array_like): Slippage in percentage of price. Will broadcast.
            broadcast_kwargs: Keyword arguments passed to `vectorbt.base.reshape_fns.broadcast`.
            freq (any): Index frequency in case `main_price.index` is not datetime-like.
            **kwargs: Keyword arguments passed to the `__init__` method.

        For defaults, see `vectorbt.defaults.portfolio`.

        All time series will be broadcasted together using `vectorbt.base.reshape_fns.broadcast`.
        At the end, they will have the same metadata.

        Example:
            Portfolio from various order sequences:
            ```python-repl
            >>> portfolio = vbt.Portfolio.from_orders(price, orders,
            ...     init_capital=100, fees=0.0025, fixed_fees=1., slippage=0.001)

            >>> portfolio.orders.records
                col  idx        size  price      fees  side
            0     0    0   98.654463  1.001  1.246883     0
            1     1    0    1.000000  1.001  1.002502     0
            2     1    1    1.000000  2.002  1.005005     0
            3     1    2    1.000000  3.003  1.007507     0
            4     1    3    1.000000  2.002  1.005005     0
            5     1    4    4.000000  0.999  1.009990     1
            6     2    0   98.654463  1.001  1.246883     0
            7     2    1   98.654463  1.998  1.492779     1
            8     2    2   64.646521  3.003  1.485334     0
            9     2    3   64.646521  1.998  1.322909     1
            10    2    4  126.398131  1.001  1.316311     0
            >>> portfolio.equity
                                 a          b           c
            2020-01-01   98.654463  98.996498   98.654463
            2020-01-02  197.308925  98.989493  195.618838
            2020-01-03  295.963388  99.978985  193.939564
            2020-01-04  197.308925  95.971980  127.840840
            2020-01-05   98.654463  90.957990  126.398131
            ```
        """
        # Get defaults
        if order_price is None:
            order_price = main_price
        if init_capital is None:
            init_capital = defaults.portfolio['init_capital']
        if fees is None:
            fees = defaults.portfolio['fees']
        if fixed_fees is None:
            fixed_fees = defaults.portfolio['fixed_fees']
        if slippage is None:
            slippage = defaults.portfolio['slippage']

        # Perform checks
        checks.assert_type(main_price, (pd.Series, pd.DataFrame))

        # Broadcast inputs
        # Only main_price is broadcasted, others can remain unchanged thanks to flexible indexing
        keep_raw = (False, True, True, True, True, True, True, True)
        main_price, order_size, size_type, order_price, fees, fixed_fees, slippage, init_capital = \
            reshape_fns.broadcast(
                main_price, order_size, size_type, order_price, fees, fixed_fees, slippage, init_capital,
                **broadcast_kwargs, writeable=True, keep_raw=keep_raw)
        target_shape = (main_price.shape[0],
                        main_price.shape[1] if main_price.ndim > 1 else 1)

        # Perform calculation
        order_records, cash, shares = nb.simulate_from_orders_nb(
            target_shape,
            init_capital,
            order_size,
            size_type,
            order_price,
            fees,
            fixed_fees,
            slippage,
            is_2d=main_price.ndim == 2)

        # Bring to the same meta
        cash = main_price.vbt.wrap(cash)
        shares = main_price.vbt.wrap(shares)
        orders = Orders(order_records, main_price, freq=freq)
        if checks.is_series(main_price):
            init_capital = init_capital.item(0)
        else:
            init_capital = np.broadcast_to(init_capital, (target_shape[1], ))
            init_capital = main_price.vbt.wrap_reduced(init_capital)

        return cls(main_price,
                   init_capital,
                   orders,
                   cash,
                   shares,
                   freq=freq,
                   **kwargs)
Пример #19
0
    def _indexing_func_meta(self,
                            pd_indexing_func,
                            index=None,
                            columns=None,
                            column_only_select=None,
                            group_select=None,
                            group_by=None):
        """Perform indexing on `ArrayWrapper` and also return indexing metadata.

        Takes into account column grouping.

        Set `column_only_select` to True to index the array wrapper as a Series of columns.
        This way, selection of index (axis 0) can be avoided. Set `group_select` to True
        to select groups rather than columns. Takes effect only if grouping is enabled.

        !!! note
            If `column_only_select` is True, make sure to index the array wrapper
            as a Series of columns rather than a DataFrame. For example, the operation
            `.iloc[:, :2]` should become `.iloc[:2]`. Operations are not allowed if the
            object is already a Series and thus has only one column/group."""
        from vectorbt import settings

        if column_only_select is None:
            column_only_select = self.column_only_select
        if column_only_select is None:
            column_only_select = settings.array_wrapper['column_only_select']
        if group_select is None:
            group_select = self.group_select
        if group_select is None:
            group_select = settings.array_wrapper['group_select']
        _self = self.regroup(group_by)
        group_select = group_select and _self.grouper.is_grouped()
        if index is None:
            index = _self.index
        if columns is None:
            if group_select:
                columns = _self.grouper.get_columns()
            else:
                columns = _self.columns
        if group_select:
            # Groups as columns
            i_wrapper = ArrayWrapper(index, columns, _self.get_ndim())
        else:
            # Columns as columns
            i_wrapper = ArrayWrapper(index, columns, _self.ndim)
        n_rows = len(index)
        n_cols = len(columns)

        if column_only_select:
            if i_wrapper.ndim == 1:
                raise IndexingError(
                    "Columns only: Attempting to select a column on a Series")
            col_mapper = i_wrapper.wrap_reduced(np.arange(n_cols),
                                                columns=columns)
            try:
                col_mapper = pd_indexing_func(col_mapper)
            except pd.core.indexing.IndexingError as e:
                warnings.warn(
                    "Columns only: Make sure to treat this object "
                    "as a Series of columns rather than a DataFrame",
                    stacklevel=2)
                raise e
            if checks.is_series(col_mapper):
                new_columns = col_mapper.index
                col_idxs = col_mapper.values
                new_ndim = 2
            else:
                new_columns = columns[[col_mapper]]
                col_idxs = col_mapper
                new_ndim = 1
            new_index = index
            idx_idxs = np.arange(len(index))
        else:
            idx_mapper = i_wrapper.wrap(np.broadcast_to(
                np.arange(n_rows)[:, None], (n_rows, n_cols)),
                                        index=index,
                                        columns=columns)
            idx_mapper = pd_indexing_func(idx_mapper)
            if i_wrapper.ndim == 1:
                if not checks.is_series(idx_mapper):
                    raise IndexingError("Selection of a scalar is not allowed")
                idx_idxs = idx_mapper.values
                col_idxs = 0
            else:
                col_mapper = i_wrapper.wrap(np.broadcast_to(
                    np.arange(n_cols), (n_rows, n_cols)),
                                            index=index,
                                            columns=columns)
                col_mapper = pd_indexing_func(col_mapper)
                if checks.is_frame(idx_mapper):
                    idx_idxs = idx_mapper.values[:, 0]
                    col_idxs = col_mapper.values[0]
                elif checks.is_series(idx_mapper):
                    one_col = np.all(
                        col_mapper.values == col_mapper.values.item(0))
                    one_idx = np.all(
                        idx_mapper.values == idx_mapper.values.item(0))
                    if one_col and one_idx:
                        # One index and one column selected, multiple times
                        raise IndexingError(
                            "Must select at least two unique indices in one of both axes"
                        )
                    elif one_col:
                        # One column selected
                        idx_idxs = idx_mapper.values
                        col_idxs = col_mapper.values[0]
                    elif one_idx:
                        # One index selected
                        idx_idxs = idx_mapper.values[0]
                        col_idxs = col_mapper.values
                    else:
                        raise IndexingError
                else:
                    raise IndexingError("Selection of a scalar is not allowed")
            new_index = index_fns.get_index(idx_mapper, 0)
            if not isinstance(idx_idxs, np.ndarray):
                # One index selected
                new_columns = index[[idx_idxs]]
            elif not isinstance(col_idxs, np.ndarray):
                # One column selected
                new_columns = columns[[col_idxs]]
            else:
                new_columns = index_fns.get_index(idx_mapper, 1)
            new_ndim = idx_mapper.ndim

        if _self.grouper.is_grouped():
            # Grouping enabled
            if np.asarray(idx_idxs).ndim == 0:
                raise IndexingError(
                    "Flipping index and columns is not allowed")

            if group_select:
                # Selection based on groups
                # Get indices of columns corresponding to selected groups
                group_idxs = col_idxs
                group_idxs_arr = reshape_fns.to_1d(group_idxs)
                group_start_idxs = _self.grouper.get_group_start_idxs(
                )[group_idxs_arr]
                group_end_idxs = _self.grouper.get_group_end_idxs(
                )[group_idxs_arr]
                ungrouped_col_idxs = get_ranges_arr(group_start_idxs,
                                                    group_end_idxs)
                ungrouped_columns = _self.columns[ungrouped_col_idxs]
                if new_ndim == 1 and len(ungrouped_columns) == 1:
                    ungrouped_ndim = 1
                    ungrouped_col_idxs = ungrouped_col_idxs[0]
                else:
                    ungrouped_ndim = 2

                # Get indices of selected groups corresponding to the new columns
                # We could do _self.group_by[ungrouped_col_idxs] but indexing operation may have changed the labels
                group_lens = _self.grouper.get_group_lens()[group_idxs_arr]
                ungrouped_group_idxs = np.full(len(ungrouped_columns), 0)
                ungrouped_group_idxs[group_lens[:-1]] = 1
                ungrouped_group_idxs = np.cumsum(ungrouped_group_idxs)

                return _self.copy(index=new_index,
                                  columns=ungrouped_columns,
                                  ndim=ungrouped_ndim,
                                  grouped_ndim=new_ndim,
                                  group_by=new_columns[ungrouped_group_idxs]
                                  ), idx_idxs, group_idxs, ungrouped_col_idxs

            # Selection based on columns
            col_idxs_arr = reshape_fns.to_1d(col_idxs)
            return _self.copy(index=new_index,
                              columns=new_columns,
                              ndim=new_ndim,
                              grouped_ndim=None,
                              group_by=_self.grouper.group_by[col_idxs_arr]
                              ), idx_idxs, col_idxs, col_idxs

        # Grouping disabled
        return _self.copy(index=new_index,
                          columns=new_columns,
                          ndim=new_ndim,
                          grouped_ndim=None,
                          group_by=None), idx_idxs, col_idxs, col_idxs
Пример #20
0
def broadcast(*args,
              to_shape=None,
              to_pd=None,
              to_2d=None,
              index_from='default',
              columns_from='default',
              writeable=False,
              copy_kwargs={},
              keep_raw=False,
              **kwargs):
    """Bring any array-like object in `args` to the same shape by using NumPy broadcasting.

    See [Broadcasting](https://docs.scipy.org/doc/numpy/user/basics.broadcasting.html).

    Can broadcast pandas objects by broadcasting their index/columns with `broadcast_index`.

    Args:
        *args (array_like): Array-like objects.
        to_shape (tuple): Target shape. If set, will broadcast every element in `args` to `to_shape`.
        to_pd (bool, tuple or list): If `True`, converts all output arrays to pandas, otherwise returns
            raw NumPy arrays. If `None`, converts only if there is at least one pandas object among them.
        to_2d (bool): If `True`, converts all Series to DataFrames.
        index_from (None, int, str or array_like): Broadcasting rule for index.
        columns_from (None, int, str or array_like): Broadcasting rule for columns.
        writeable (bool): If `True`, makes broadcasted arrays writable, otherwise readonly.

            !!! note
                Has effect only if broadcasting was needed for that particular array.

                Making arrays writable is possible only through copying them, which is pretty expensive.

                Numba requires arrays to be writable.

        copy_kwargs (dict): Keyword arguments passed to `np.array`. For example, to specify `order`.

            !!! note
                Has effect on every array, independent from whether broadcasting was needed or not.

        keep_raw (bool, tuple or list): If `True`, will keep the unbroadcasted version of the array.
        **kwargs: Keyword arguments passed to `broadcast_index`.

    For defaults, see `vectorbt.defaults.broadcasting`.

    Example:
        Without broadcasting index and columns:

        ```python-repl
        >>> import numpy as np
        >>> import pandas as pd
        >>> from vectorbt.base.reshape_fns import broadcast

        >>> v = 0
        >>> a = np.array([1, 2, 3])
        >>> sr = pd.Series([1, 2, 3], index=pd.Index(['x', 'y', 'z']), name='a')
        >>> df = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], 
        ...     index=pd.Index(['x2', 'y2', 'z2']), 
        ...     columns=pd.Index(['a2', 'b2', 'c2']))

        >>> for i in broadcast(
        ...     v, a, sr, df,
        ...     index_from=None,
        ...     columns_from=None,
        ... ): print(i)
           0  1  2
        0  0  0  0
        1  0  0  0
        2  0  0  0
           0  1  2
        0  1  2  3
        1  1  2  3
        2  1  2  3
           a  a  a
        x  1  1  1
        y  2  2  2
        z  3  3  3
            a2  b2  c2
        x2   1   2   3
        y2   4   5   6
        z2   7   8   9
        ```

        Taking new index and columns from position:

        ```python-repl
        >>> for i in broadcast(
        ...     v, a, sr, df,
        ...     index_from=2,
        ...     columns_from=3
        ... ): print(i)
           a2  b2  c2
        x   0   0   0
        y   0   0   0
        z   0   0   0
           a2  b2  c2
        x   1   2   3
        y   1   2   3
        z   1   2   3
           a2  b2  c2
        x   1   1   1
        y   2   2   2
        z   3   3   3
           a2  b2  c2
        x   1   2   3
        y   4   5   6
        z   7   8   9
        ```

        Broadcasting index and columns through stacking:

        ```python-repl
        >>> for i in broadcast(
        ...     v, a, sr, df,
        ...     index_from='stack',
        ...     columns_from='stack'
        ... ): print(i)
              a2  b2  c2
        x x2   0   0   0
        y y2   0   0   0
        z z2   0   0   0
              a2  b2  c2
        x x2   1   2   3
        y y2   1   2   3
        z z2   1   2   3
              a2  b2  c2
        x x2   1   1   1
        y y2   2   2   2
        z z2   3   3   3
              a2  b2  c2
        x x2   1   2   3
        y y2   4   5   6
        z z2   7   8   9
        ```

        Setting index and columns manually:

        ```python-repl
        >>> for i in broadcast(
        ...     v, a, sr, df,
        ...     index_from=['a', 'b', 'c'],
        ...     columns_from=['d', 'e', 'f']
        ... ): print(i)
           d  e  f
        a  0  0  0
        b  0  0  0
        c  0  0  0
           d  e  f
        a  1  2  3
        b  1  2  3
        c  1  2  3
           d  e  f
        a  1  1  1
        b  2  2  2
        c  3  3  3
           d  e  f
        a  1  2  3
        b  4  5  6
        c  7  8  9
        ```
    """
    is_pd = False
    is_2d = False
    args = list(args)
    if isinstance(index_from, str) and index_from == 'default':
        index_from = defaults.broadcasting['index_from']
    if isinstance(columns_from, str) and columns_from == 'default':
        columns_from = defaults.broadcasting['columns_from']

    # Convert to np.ndarray object if not numpy or pandas
    # Also check whether we broadcast to pandas and whether work on 2-dim data
    for i in range(len(args)):
        if not checks.is_array(args[i]):
            args[i] = np.asarray(args[i])
        if args[i].ndim > 1:
            is_2d = True
        if checks.is_pandas(args[i]):
            is_pd = True

    # If target shape specified, check again if we work on 2-dim data
    if to_shape is not None:
        checks.assert_type(to_shape, tuple)
        if len(to_shape) > 1:
            is_2d = True

    if to_2d is not None:
        # force either keeping Series or converting them to DataFrames
        is_2d = to_2d

    if to_pd is not None:
        # force either raw or pandas
        if isinstance(to_pd, (tuple, list)):
            is_pd = np.array(to_pd).any()
        else:
            is_pd = to_pd

    # Convert all pd.Series objects to pd.DataFrame if we work on 2-dim data
    args_2d = [
        arg.to_frame() if is_2d and checks.is_series(arg) else arg
        for arg in args
    ]

    # Get final shape
    if to_shape is None:
        to_shape = np.lib.stride_tricks._broadcast_shape(*args_2d)

    # Perform broadcasting
    new_args = []
    for i, arg in enumerate(args_2d):
        if isinstance(keep_raw, (tuple, list)):
            _keep_raw = keep_raw[i]
        else:
            _keep_raw = keep_raw
        if _keep_raw:
            new_args.append(arg)
            continue
        new_args.append(np.broadcast_to(arg, to_shape, subok=True))

    # The problem is that broadcasting creates readonly objects and Numba requires writable ones.
    # To make them writable we must copy, which is ok for small-sized arrays and not ok for large ones.
    # Thus check if broadcasting was needed in the first place, and if so, copy
    for i in range(len(new_args)):
        if new_args[i].shape == args_2d[i].shape:
            # Broadcasting was not needed, take old array
            new_args[i] = np.array(args_2d[i], copy=False, **copy_kwargs)
        else:
            # Broadcasting was needed, take new array
            new_args[i] = np.array(new_args[i], copy=writeable, **copy_kwargs)

    if is_pd:
        # Decide on index and columns
        # NOTE: Important to pass args, not args_2d, to preserve original shape info
        new_index = broadcast_index(args,
                                    to_shape,
                                    index_from=index_from,
                                    axis=0,
                                    **kwargs)
        new_columns = broadcast_index(args,
                                      to_shape,
                                      index_from=columns_from,
                                      axis=1,
                                      **kwargs)
    else:
        new_index, new_columns = None, None

    # Bring arrays to their old types (e.g. array -> pandas)
    for i in range(len(new_args)):
        if isinstance(keep_raw, (tuple, list)):
            _keep_raw = keep_raw[i]
        else:
            _keep_raw = keep_raw
        if _keep_raw:
            continue
        if isinstance(to_pd, (tuple, list)):
            _is_pd = to_pd[i]
        else:
            _is_pd = is_pd
        new_args[i] = wrap_broadcasted(args[i],
                                       new_args[i],
                                       is_pd=_is_pd,
                                       new_index=new_index,
                                       new_columns=new_columns)

    if len(new_args) > 1:
        return tuple(new_args)
    return new_args[0]
Пример #21
0
 def columns(self):
     """Return `[name]` of Series and `columns` of DataFrame."""
     if checks.is_series(self._obj):
         return pd.Index([self._obj.name])
     return self._obj.columns
Пример #22
0
    def from_order_func(cls,
                        main_price,
                        order_func_nb,
                        *args,
                        init_capital=None,
                        row_wise=False,
                        row_prep_func_nb=None,
                        broadcast_kwargs={},
                        freq=None,
                        **kwargs):
        """Build portfolio from a custom order function.

        Starting with initial capital `init_capital`, iterates over shape `main_price.shape`, and for
        each data point, generates an order using `order_func_nb`. This way, you can specify order
        size, price and transaction costs dynamically (for example, based on the current balance).

        if `row_wise` is `True`, see `vectorbt.portfolio.nb.simulate_row_wise_nb`.
        Otherwise, see `vectorbt.portfolio.nb.simulate_nb`.

        Args:
            main_price (pandas_like): Main price of the asset, such as close. Will broadcast.
            order_func_nb (function): Function that returns an order.

                See `vectorbt.portfolio.enums.Order`.
            *args: Arguments passed to `order_func_nb`.
            init_capital (float or array_like): The initial capital. Will broadcast.

                Allowed is either a single value or value per column.
            row_wise (bool): If `True`, iterates over rows, otherwise over columns.

                Set to `True` if columns depend upon each other.
            row_prep_func_nb (function): Function to call before iterating over the next row.

                Can be used to do preprocessing, such as to calculate past returns.
            broadcast_kwargs: Keyword arguments passed to `vectorbt.base.reshape_fns.broadcast`.
            freq (any): Index frequency in case `main_price.index` is not datetime-like.
            **kwargs: Keyword arguments passed to the `__init__` method.

        For defaults, see `vectorbt.defaults.portfolio`.

        All time series will be broadcasted together using `vectorbt.base.reshape_fns.broadcast`.
        At the end, they will have the same metadata.

        !!! note
            `order_func_nb` must be Numba-compiled.

        Example:
            Placing a buy order each day:
            ```python-repl
            >>> from vectorbt.portfolio import Order, SizeType

            >>> @njit
            ... def order_func_nb(oc, price):
            ...     return Order(10, SizeType.Shares, price[oc.i],
            ...         fees=0.01, fixed_fees=1., slippage=0.01)

            >>> portfolio = vbt.Portfolio.from_order_func(
            ...     price, order_func_nb, price.values, init_capital=100)

            >>> portfolio.orders.records
               col  idx  size  price   fees  side
            0    0    0  10.0   1.01  1.101     0
            1    0    1  10.0   2.02  1.202     0
            2    0    2  10.0   3.03  1.303     0
            3    0    3  10.0   2.02  1.202     0
            4    0    4  10.0   1.01  1.101     0
            >>> portfolio.equity
            2020-01-01     98.799
            2020-01-02    107.397
            2020-01-03    125.794
            2020-01-04     94.392
            2020-01-05     53.191
            Name: a, dtype: float64
            ```
        """
        # Get defaults
        if init_capital is None:
            init_capital = defaults.portfolio['init_capital']

        # Perform checks
        checks.assert_type(main_price, (pd.Series, pd.DataFrame))
        checks.assert_numba_func(order_func_nb)

        # Broadcast inputs
        # Only main_price is broadcasted, others can remain unchanged thanks to flexible indexing
        keep_raw = (False, True)
        main_price, init_capital = reshape_fns.broadcast(main_price,
                                                         init_capital,
                                                         **broadcast_kwargs,
                                                         writeable=True,
                                                         keep_raw=keep_raw)
        target_shape = (main_price.shape[0],
                        main_price.shape[1] if main_price.ndim > 1 else 1)

        # Perform calculation
        if row_wise:
            if row_prep_func_nb is None:
                row_prep_func_nb = nb.none_row_prep_func_nb
            order_records, cash, shares = nb.simulate_row_wise_nb(
                target_shape, init_capital, row_prep_func_nb, order_func_nb,
                *args)
        else:
            if row_prep_func_nb is not None:
                raise ValueError(
                    "Function row_prep_func_nb can be only called when row_wise=True"
                )
            order_records, cash, shares = nb.simulate_nb(
                target_shape, init_capital, order_func_nb, *args)

        # Bring to the same meta
        cash = main_price.vbt.wrap(cash)
        shares = main_price.vbt.wrap(shares)
        orders = Orders(order_records, main_price, freq=freq)
        if checks.is_series(main_price):
            init_capital = init_capital.item(0)
        else:
            init_capital = np.broadcast_to(init_capital, (target_shape[1], ))
            init_capital = main_price.vbt.wrap_reduced(init_capital)

        return cls(main_price,
                   init_capital,
                   orders,
                   cash,
                   shares,
                   freq=freq,
                   **kwargs)
Пример #23
0
def broadcast_index(*args,
                    index_from=None,
                    axis=0,
                    is_2d=False,
                    ignore_single='default',
                    drop_duplicates='default',
                    keep='default'):
    """Broadcast index/columns of all arguments."""

    if ignore_single == 'default':
        ignore_single = defaults.broadcast['ignore_single']
    if drop_duplicates == 'default':
        drop_duplicates = defaults.broadcast['drop_duplicates']
    if keep == 'default':
        keep = defaults.broadcast['keep']
    index_str = 'columns' if axis == 1 else 'index'
    new_index = None

    if index_from is not None:
        if isinstance(index_from, int):
            # Take index/columns of the object indexed by index_from
            if axis == 1:
                new_index = to_2d(args[index_from]).columns
            else:
                new_index = args[index_from].index
        elif isinstance(index_from, str) and index_from in ('stack', 'strict'):
            # If pandas objects have different index/columns, stack them together
            # maxlen stores the length of the longest index
            max_shape = np.lib.stride_tricks._broadcast_shape(*args)
            if axis == 1 and len(max_shape) == 1:
                max_shape = (max_shape[0], 1)
            maxlen = max_shape[1] if axis == 1 else max_shape[0]
            for arg in args:
                if checks.is_pandas(arg):
                    if checks.is_series(arg):
                        arg = arg.to_frame()  # series name counts as a column
                    index = arg.columns if axis == 1 else arg.index
                    if new_index is None:
                        new_index = index
                    else:
                        if index_from == 'strict':
                            # If pandas objects have different index/columns, raise an exception
                            if not pd.Index.equals(index, new_index):
                                raise ValueError(
                                    f"Broadcasting {index_str} is not allowed for {index_str}_from=strict"
                                )
                        # Broadcasting index must follow the rules of a regular broadcasting operation
                        # https://docs.scipy.org/doc/numpy/user/basics.broadcasting.html#general-broadcasting-rules
                        # 1. rule: if indexes are of the same length, they are simply stacked
                        # 2. rule: if index has one element, it gets repeated and then stacked

                        if pd.Index.equals(index, new_index):
                            continue
                        if len(index) != len(new_index):
                            if len(index) > 1 and len(new_index) > 1:
                                raise ValueError(
                                    "Indexes could not be broadcast together")
                            if ignore_single:
                                # Columns of length 1 should be simply ignored
                                if len(index) > len(new_index):
                                    new_index = index
                                continue
                            else:
                                if len(index) > len(new_index):
                                    new_index = index_fns.repeat(
                                        new_index, len(index))
                                elif len(index) < len(new_index):
                                    index = index_fns.repeat(
                                        index, len(new_index))
                        new_index = index_fns.stack(new_index, index)
                        if drop_duplicates:
                            new_index = index_fns.drop_duplicate_levels(
                                new_index, keep=keep)
            if maxlen > len(new_index):
                if index_from == 'strict':
                    raise ValueError(
                        f"Broadcasting {index_str} is not allowed for {index_str}_from=strict"
                    )
                # This happens only when some numpy object is longer than the new pandas index
                # In this case, new pandas index (one element) should be repeated to match this length.
                if maxlen > 1 and len(new_index) > 1:
                    raise ValueError("Indexes could not be broadcast together")
                new_index = index_fns.repeat(new_index, maxlen)
        else:
            raise ValueError(
                f"Invalid value {index_from} for {'columns' if axis == 1 else 'index'}_from"
            )
    return new_index
Пример #24
0
def broadcast_index(*args,
                    to_shape=None,
                    index_from=None,
                    axis=0,
                    ignore_single='default',
                    drop_duplicates='default',
                    keep='default'):
    """Produce a broadcasted index/columns.

    Args:
        *args (array_like): Array-like objects.
        to_shape (tuple): Target shape. Optional.
        index_from (None, int, str or array_like): Broadcasting rule for this index/these columns.

            Accepts the following values:

            * `'default'` - take the value from `vectorbt.defaults.broadcasting`
            * `None` - use the original index/columns of the objects in `args`
            * `int` - use the index/columns of the i-nth object in `args`
            * `'strict'` - ensure that all pandas objects have the same index/columns
            * `'stack'` - stack different indexes/columns using `vectorbt.utils.index_fns.stack_indexes`
            * everything else will be converted to `pd.Index`

        axis (int): Set to 0 for index and 1 for columns.
        ignore_single (bool): If `True`, ignores indexes/columns with one value, otherwise they will be repeated
            to match the length of the longest index/columns (can lead to pollution of levels).
        drop_duplicates (bool): See `vectorbt.utils.index_fns.drop_duplicate_levels`.
        keep (bool): See `vectorbt.utils.index_fns.drop_duplicate_levels`.

    For defaults, see `vectorbt.defaults.broadcasting`.
    """

    if ignore_single == 'default':
        ignore_single = defaults.broadcasting['ignore_single']
    if drop_duplicates == 'default':
        drop_duplicates = defaults.broadcasting['drop_duplicates']
    if keep == 'default':
        keep = defaults.broadcasting['keep']
    index_str = 'columns' if axis == 1 else 'index'
    new_index = None

    if index_from is not None:
        if isinstance(index_from, int):
            # Take index/columns of the object indexed by index_from
            if axis == 1:
                new_index = to_2d(args[index_from]).columns
            else:
                new_index = args[index_from].index
        elif isinstance(index_from, str):
            if index_from in ('stack', 'strict'):
                # If pandas objects have different index/columns, stack them together
                # maxlen stores the length of the longest index
                if to_shape is None:
                    # Simulate broadcasting
                    to_shape = np.lib.stride_tricks._broadcast_shape(*args)
                if axis == 1 and len(to_shape) == 1:
                    to_shape = (to_shape[0], 1)
                maxlen = to_shape[1] if axis == 1 else to_shape[0]
                for arg in args:
                    if checks.is_pandas(arg):
                        if checks.is_series(arg):
                            arg = arg.to_frame(
                            )  # series name counts as a column
                        index = arg.columns if axis == 1 else arg.index
                        if new_index is None:
                            new_index = index
                        else:
                            if index_from == 'strict':
                                # If pandas objects have different index/columns, raise an exception
                                if not pd.Index.equals(index, new_index):
                                    raise ValueError(
                                        f"Broadcasting {index_str} is not allowed for {index_str}_from=strict"
                                    )
                            # Broadcasting index must follow the rules of a regular broadcasting operation
                            # https://docs.scipy.org/doc/numpy/user/basics.broadcasting.html#general-broadcasting-rules
                            # 1. rule: if indexes are of the same length, they are simply stacked
                            # 2. rule: if index has one element, it gets repeated and then stacked

                            if pd.Index.equals(index, new_index):
                                continue
                            if len(index) != len(new_index):
                                if len(index) > 1 and len(new_index) > 1:
                                    raise ValueError(
                                        "Indexes could not be broadcast together"
                                    )
                                if ignore_single:
                                    # Columns of length 1 should be simply ignored
                                    if len(index) > len(new_index):
                                        new_index = index
                                    continue
                                else:
                                    if len(index) > len(new_index):
                                        new_index = index_fns.repeat_index(
                                            new_index, len(index))
                                    elif len(index) < len(new_index):
                                        index = index_fns.repeat_index(
                                            index, len(new_index))
                            new_index = index_fns.stack_indexes(
                                new_index, index)
                            if drop_duplicates:
                                new_index = index_fns.drop_duplicate_levels(
                                    new_index, keep=keep)
                if maxlen > len(new_index):
                    if index_from == 'strict':
                        raise ValueError(
                            f"Broadcasting {index_str} is not allowed for {index_str}_from=strict"
                        )
                    # This happens only when some numpy object is longer than the new pandas index
                    # In this case, new pandas index (one element) should be repeated to match this length.
                    if maxlen > 1 and len(new_index) > 1:
                        raise ValueError(
                            "Indexes could not be broadcast together")
                    new_index = index_fns.repeat_index(new_index, maxlen)
            else:
                raise ValueError(
                    f"Invalid value {index_from} for {'columns' if axis == 1 else 'index'}_from"
                )
        else:
            new_index = index_from
    return new_index
Пример #25
0
    def wrap_reduced(
            self,
            arr: tp.ArrayLike,
            name_or_index: tp.NameIndex = None,
            columns: tp.Optional[tp.IndexLike] = None,
            fillna: tp.Optional[tp.Scalar] = None,
            dtype: tp.Optional[tp.PandasDTypeLike] = None,
            group_by: tp.GroupByLike = None,
            to_timedelta: bool = False,
            to_index: bool = False,
            silence_warnings: tp.Optional[bool] = None) -> tp.MaybeSeriesFrame:
        """Wrap result of reduction.

        `name_or_index` can be the name of the resulting series if reducing to a scalar per column,
        or the index of the resulting series/dataframe if reducing to an array per column.
        `columns` can be set to override object's default columns.

        See `ArrayWrapper.wrap` for the pipeline."""
        from vectorbt._settings import settings
        array_wrapper_cfg = settings['array_wrapper']

        if silence_warnings is None:
            silence_warnings = array_wrapper_cfg['silence_warnings']

        checks.assert_not_none(self.ndim)
        _self = self.resolve(group_by=group_by)

        if columns is None:
            columns = _self.columns
        if not isinstance(columns, pd.Index):
            columns = pd.Index(columns)

        if to_index:
            if dtype is None:
                dtype = np.int_
            if fillna is None:
                fillna = -1

        def _wrap_reduced(arr):
            nonlocal name_or_index

            arr = np.asarray(arr)
            if fillna is not None:
                arr[pd.isnull(arr)] = fillna
            if arr.ndim == 0:
                # Scalar per Series/DataFrame
                return pd.Series(arr, dtype=dtype)[0]
            if arr.ndim == 1:
                if _self.ndim == 1:
                    if arr.shape[0] == 1:
                        # Scalar per Series/DataFrame with one column
                        return pd.Series(arr, dtype=dtype)[0]
                    # Array per Series
                    sr_name = columns[0]
                    if sr_name == 0:  # was arr Series before
                        sr_name = None
                    if isinstance(name_or_index, str):
                        name_or_index = None
                    return pd.Series(arr,
                                     index=name_or_index,
                                     name=sr_name,
                                     dtype=dtype)
                # Scalar per column in arr DataFrame
                return pd.Series(arr,
                                 index=columns,
                                 name=name_or_index,
                                 dtype=dtype)
            if arr.ndim == 2:
                if arr.shape[1] == 1 and _self.ndim == 1:
                    arr = reshape_fns.soft_to_ndim(arr, 1)
                    # Array per Series
                    sr_name = columns[0]
                    if sr_name == 0:  # was arr Series before
                        sr_name = None
                    if isinstance(name_or_index, str):
                        name_or_index = None
                    return pd.Series(arr,
                                     index=name_or_index,
                                     name=sr_name,
                                     dtype=dtype)
                # Array per column in DataFrame
                if isinstance(name_or_index, str):
                    name_or_index = None
                return pd.DataFrame(arr,
                                    index=name_or_index,
                                    columns=columns,
                                    dtype=dtype)
            raise ValueError(f"{arr.ndim}-d input is not supported")

        out = _wrap_reduced(arr)
        if to_index:
            # Convert to index
            if checks.is_series(out):
                out = out.map(lambda x: self.index[x] if x != -1 else np.nan)
            elif checks.is_frame(out):
                out = out.applymap(lambda x: self.index[x]
                                   if x != -1 else np.nan)
            else:
                out = self.index[out] if out != -1 else np.nan
        if to_timedelta:
            # Convert to timedelta
            out = self.to_timedelta(out, silence_warnings=silence_warnings)
        return out
Пример #26
0
    def wrap(self,
             arr: tp.ArrayLike,
             index: tp.Optional[tp.IndexLike] = None,
             columns: tp.Optional[tp.IndexLike] = None,
             fillna: tp.Optional[tp.Scalar] = None,
             dtype: tp.Optional[tp.PandasDTypeLike] = None,
             group_by: tp.GroupByLike = None,
             to_timedelta: bool = False,
             to_index: bool = False,
             silence_warnings: tp.Optional[bool] = None) -> tp.SeriesFrame:
        """Wrap a NumPy array using the stored metadata.

        Runs the following pipeline:

        1) Converts to NumPy array
        2) Fills NaN (optional)
        3) Wraps using index, columns, and dtype (optional)
        4) Converts to index (optional)
        5) Converts to timedelta using `ArrayWrapper.to_timedelta` (optional)"""
        from vectorbt._settings import settings
        array_wrapper_cfg = settings['array_wrapper']

        if silence_warnings is None:
            silence_warnings = array_wrapper_cfg['silence_warnings']

        _self = self.resolve(group_by=group_by)

        if index is None:
            index = _self.index
        if not isinstance(index, pd.Index):
            index = pd.Index(index)
        if columns is None:
            columns = _self.columns
        if not isinstance(columns, pd.Index):
            columns = pd.Index(columns)
        if len(columns) == 1:
            name = columns[0]
            if name == 0:  # was a Series before
                name = None
        else:
            name = None

        def _wrap(arr):
            arr = np.asarray(arr)
            checks.assert_ndim(arr, (1, 2))
            if fillna is not None:
                arr[pd.isnull(arr)] = fillna
            arr = reshape_fns.soft_to_ndim(arr, self.ndim)
            checks.assert_shape_equal(arr, index, axis=(0, 0))
            if arr.ndim == 2:
                checks.assert_shape_equal(arr, columns, axis=(1, 0))
            if arr.ndim == 1:
                return pd.Series(arr, index=index, name=name, dtype=dtype)
            if arr.ndim == 2:
                if arr.shape[1] == 1 and _self.ndim == 1:
                    return pd.Series(arr[:, 0],
                                     index=index,
                                     name=name,
                                     dtype=dtype)
                return pd.DataFrame(arr,
                                    index=index,
                                    columns=columns,
                                    dtype=dtype)
            raise ValueError(f"{arr.ndim}-d input is not supported")

        out = _wrap(arr)
        if to_index:
            # Convert to index
            if checks.is_series(out):
                out = out.map(lambda x: self.index[x] if x != -1 else np.nan)
            else:
                out = out.applymap(lambda x: self.index[x]
                                   if x != -1 else np.nan)
        if to_timedelta:
            # Convert to timedelta
            out = self.to_timedelta(out, silence_warnings=silence_warnings)
        return out
Пример #27
0
def broadcast(*args,
              index_from='default',
              columns_from='default',
              writeable=False,
              copy_kwargs={},
              **kwargs):
    """Bring multiple arguments to the same shape."""
    is_pd = False
    is_2d = False
    args = list(args)

    # Convert to np.ndarray object if not numpy or pandas
    for i in range(len(args)):
        if not checks.is_array_like(args[i]):
            args[i] = np.asarray(args[i])
        if args[i].ndim > 1:
            is_2d = True
        if checks.is_pandas(args[i]):
            is_pd = True

    if is_pd:
        # Convert all pd.Series objects to pd.DataFrame
        if is_2d:
            for i in range(len(args)):
                if checks.is_series(args[i]):
                    args[i] = args[i].to_frame()

        # Decide on index and columns
        if index_from == 'default':
            index_from = defaults.broadcast['index_from']
        if columns_from == 'default':
            columns_from = defaults.broadcast['columns_from']
        new_index = broadcast_index(*args,
                                    index_from=index_from,
                                    axis=0,
                                    is_2d=is_2d,
                                    **kwargs)
        new_columns = broadcast_index(*args,
                                      index_from=columns_from,
                                      axis=1,
                                      is_2d=is_2d,
                                      **kwargs)
    else:
        new_index, new_columns = None, None

    # Perform broadcasting operation if needed
    if is_broadcasting_needed(*args):
        new_args = np.broadcast_arrays(*args, subok=True)
        # The problem is that broadcasting creates readonly objects and numba requires writable ones.
        # So we have to copy all of them, which is ok for small-sized arrays and not ok for large ones.

        # copy kwarg is only applied when broadcasting was done to avoid deprecation warnings
        # NOTE: If copy=False, then the resulting arrays will be readonly in the future!
        new_args = list(
            map(lambda x: np.array(x, copy=writeable, **copy_kwargs),
                new_args))
    else:
        # No copy here, just pandas -> numpy and any order to contiguous
        new_args = list(
            map(lambda x: np.array(x, copy=False, **copy_kwargs), args))

    # Bring arrays to their old types (e.g. array -> pandas)
    for i in range(len(new_args)):
        new_args[i] = wrap_broadcasted(args[i],
                                       new_args[i],
                                       is_pd=is_pd,
                                       new_index=new_index,
                                       new_columns=new_columns)

    return tuple(new_args)
Пример #28
0
def broadcast_index(args,
                    to_shape,
                    index_from=None,
                    axis=0,
                    ignore_sr_names=None,
                    **kwargs):
    """Produce a broadcast index/columns.

    Args:
        *args (array_like): Array-like objects.
        to_shape (tuple): Target shape.
        index_from (None, int, str or array_like): Broadcasting rule for this index/these columns.

            Accepts the following values:

            * 'default' - take the value from `vectorbt.settings.broadcasting`
            * 'strict' - ensure that all pandas objects have the same index/columns
            * 'stack' - stack different indexes/columns using `vectorbt.base.index_fns.stack_indexes`
            * 'ignore' - ignore any index/columns
            * integer - use the index/columns of the i-nth object in `args`
            * None - use the original index/columns of the objects in `args`
            * everything else will be converted to `pd.Index`

        axis (int): Set to 0 for index and 1 for columns.
        ignore_sr_names (bool): Whether to ignore Series names if they are in conflict.

            Conflicting Series names are those that are different but not None.
        **kwargs: Keyword arguments passed to `vectorbt.base.index_fns.stack_indexes`.

    For defaults, see `vectorbt.settings.broadcasting`.

    !!! note
        Series names are treated as columns with a single element but without a name.
        If a column level without a name loses its meaning, better to convert Series to DataFrames
        with one column prior to broadcasting. If the name of a Series is not that important,
        better to drop it altogether by setting it to None.
    """
    from vectorbt import settings

    if ignore_sr_names is None:
        ignore_sr_names = settings.broadcasting['ignore_sr_names']
    index_str = 'columns' if axis == 1 else 'index'
    to_shape_2d = (to_shape[0], 1) if len(to_shape) == 1 else to_shape
    # maxlen stores the length of the longest index
    maxlen = to_shape_2d[1] if axis == 1 else to_shape_2d[0]
    new_index = None

    if index_from is not None:
        if isinstance(index_from, int):
            # Take index/columns of the object indexed by index_from
            if not checks.is_pandas(args[index_from]):
                raise TypeError(
                    f"Argument under index {index_from} must be a pandas object"
                )
            new_index = index_fns.get_index(args[index_from], axis)
        elif isinstance(index_from, str):
            if index_from == 'ignore':
                # Ignore index/columns
                new_index = pd.RangeIndex(start=0, stop=maxlen, step=1)
            elif index_from in ('stack', 'strict'):
                # Check whether all indexes/columns are equal
                last_index = None  # of type pd.Index
                index_conflict = False
                for arg in args:
                    if checks.is_pandas(arg):
                        index = index_fns.get_index(arg, axis)
                        if last_index is not None:
                            if not pd.Index.equals(index, last_index):
                                index_conflict = True
                        last_index = index
                        continue
                if not index_conflict:
                    new_index = last_index
                else:
                    # If pandas objects have different index/columns, stack them together
                    for arg in args:
                        if checks.is_pandas(arg):
                            index = index_fns.get_index(arg, axis)
                            if axis == 1 and checks.is_series(
                                    arg) and ignore_sr_names:
                                # ignore Series name
                                continue
                            if checks.is_default_index(index):
                                # ignore simple ranges without name
                                continue
                            if new_index is None:
                                new_index = index
                            else:
                                if index_from == 'strict':
                                    # If pandas objects have different index/columns, raise an exception
                                    if not pd.Index.equals(index, new_index):
                                        raise ValueError(
                                            f"Broadcasting {index_str} is not allowed when {index_str}_from=strict"
                                        )
                                # Broadcasting index must follow the rules of a regular broadcasting operation
                                # https://docs.scipy.org/doc/numpy/user/basics.broadcasting.html#general-broadcasting-rules
                                # 1. rule: if indexes are of the same length, they are simply stacked
                                # 2. rule: if index has one element, it gets repeated and then stacked

                                if pd.Index.equals(index, new_index):
                                    continue
                                if len(index) != len(new_index):
                                    if len(index) > 1 and len(new_index) > 1:
                                        raise ValueError(
                                            "Indexes could not be broadcast together"
                                        )
                                    if len(index) > len(new_index):
                                        new_index = index_fns.repeat_index(
                                            new_index, len(index))
                                    elif len(index) < len(new_index):
                                        index = index_fns.repeat_index(
                                            index, len(new_index))
                                new_index = index_fns.stack_indexes(
                                    new_index, index, **kwargs)
            else:
                raise ValueError(
                    f"Invalid value {index_from} for {'columns' if axis == 1 else 'index'}_from"
                )
        else:
            new_index = index_from
        if new_index is not None:
            if maxlen > len(new_index):
                if index_from == 'strict':
                    raise ValueError(
                        f"Broadcasting {index_str} is not allowed when {index_str}_from=strict"
                    )
                # This happens only when some numpy object is longer than the new pandas index
                # In this case, new pandas index (one element) should be repeated to match this length.
                if maxlen > 1 and len(new_index) > 1:
                    raise ValueError("Indexes could not be broadcast together")
                new_index = index_fns.repeat_index(new_index, maxlen)
        elif index_from is not None:
            # new_index=None can mean two things: 1) take original metadata or 2) reset index/columns
            # In case when index_from is not None, we choose 2)
            new_index = pd.RangeIndex(start=0, stop=maxlen, step=1)
    return new_index
Пример #29
0
def broadcast(*args,
              to_shape=None,
              to_pd=None,
              to_frame=None,
              align_index=None,
              align_columns=None,
              index_from='default',
              columns_from='default',
              require_kwargs=None,
              keep_raw=False,
              return_meta=False,
              **kwargs):
    """Bring any array-like object in `args` to the same shape by using NumPy broadcasting.

    See [Broadcasting](https://docs.scipy.org/doc/numpy/user/basics.broadcasting.html).

    Can broadcast pandas objects by broadcasting their index/columns with `broadcast_index`.

    Args:
        *args (array_like): Array-like objects.
        to_shape (tuple): Target shape. If set, will broadcast every element in `args` to `to_shape`.
        to_pd (bool, tuple or list): Whether to convert all output arrays to pandas, otherwise returns
            raw NumPy arrays. If None, converts only if there is at least one pandas object among them.
        to_frame (bool): Whether to convert all Series to DataFrames.
        align_index (bool): Whether to align index of pandas objects using multi-index.
        align_columns (bool): Whether to align columns of pandas objects using multi-index.
        index_from (any): Broadcasting rule for index.
        columns_from (any): Broadcasting rule for columns.
        require_kwargs (dict or list of dict): Keyword arguments passed to `np.require`.
        keep_raw (bool, tuple or list): Whether to keep the unbroadcasted version of the array.

            Only makes sure that the array can be broadcast to the target shape.
        return_meta (bool): If True, will also return new shape, index and columns.
        **kwargs: Keyword arguments passed to `broadcast_index`.

    For defaults, see `vectorbt.settings.broadcasting`.

    ## Example

    Without broadcasting index and columns:
    ```python-repl
    >>> import numpy as np
    >>> import pandas as pd
    >>> from vectorbt.base.reshape_fns import broadcast

    >>> v = 0
    >>> a = np.array([1, 2, 3])
    >>> sr = pd.Series([1, 2, 3], index=pd.Index(['x', 'y', 'z']), name='a')
    >>> df = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]],
    ...     index=pd.Index(['x2', 'y2', 'z2']),
    ...     columns=pd.Index(['a2', 'b2', 'c2']))

    >>> for i in broadcast(
    ...     v, a, sr, df,
    ...     index_from=None,
    ...     columns_from=None,
    ... ): print(i)
       0  1  2
    0  0  0  0
    1  0  0  0
    2  0  0  0
       0  1  2
    0  1  2  3
    1  1  2  3
    2  1  2  3
       a  a  a
    x  1  1  1
    y  2  2  2
    z  3  3  3
        a2  b2  c2
    x2   1   2   3
    y2   4   5   6
    z2   7   8   9
    ```

    Taking new index and columns from position:
    ```python-repl
    >>> for i in broadcast(
    ...     v, a, sr, df,
    ...     index_from=2,
    ...     columns_from=3
    ... ): print(i)
       a2  b2  c2
    x   0   0   0
    y   0   0   0
    z   0   0   0
       a2  b2  c2
    x   1   2   3
    y   1   2   3
    z   1   2   3
       a2  b2  c2
    x   1   1   1
    y   2   2   2
    z   3   3   3
       a2  b2  c2
    x   1   2   3
    y   4   5   6
    z   7   8   9
    ```

    Broadcasting index and columns through stacking:
    ```python-repl
    >>> for i in broadcast(
    ...     v, a, sr, df,
    ...     index_from='stack',
    ...     columns_from='stack'
    ... ): print(i)
          a2  b2  c2
    x x2   0   0   0
    y y2   0   0   0
    z z2   0   0   0
          a2  b2  c2
    x x2   1   2   3
    y y2   1   2   3
    z z2   1   2   3
          a2  b2  c2
    x x2   1   1   1
    y y2   2   2   2
    z z2   3   3   3
          a2  b2  c2
    x x2   1   2   3
    y y2   4   5   6
    z z2   7   8   9
    ```

    Setting index and columns manually:
    ```python-repl
    >>> for i in broadcast(
    ...     v, a, sr, df,
    ...     index_from=['a', 'b', 'c'],
    ...     columns_from=['d', 'e', 'f']
    ... ): print(i)
       d  e  f
    a  0  0  0
    b  0  0  0
    c  0  0  0
       d  e  f
    a  1  2  3
    b  1  2  3
    c  1  2  3
       d  e  f
    a  1  1  1
    b  2  2  2
    c  3  3  3
       d  e  f
    a  1  2  3
    b  4  5  6
    c  7  8  9
    ```
    """
    from vectorbt import settings

    is_pd = False
    is_2d = False
    args = list(args)
    if require_kwargs is None:
        require_kwargs = {}
    if align_index is None:
        align_index = settings.broadcasting['align_index']
    if align_columns is None:
        align_columns = settings.broadcasting['align_columns']
    if isinstance(index_from, str) and index_from == 'default':
        index_from = settings.broadcasting['index_from']
    if isinstance(columns_from, str) and columns_from == 'default':
        columns_from = settings.broadcasting['columns_from']

    # Convert to np.ndarray object if not numpy or pandas
    # Also check whether we broadcast to pandas and whether work on 2-dim data
    for i in range(len(args)):
        if not checks.is_array(args[i]):
            args[i] = np.asarray(args[i])
        if args[i].ndim > 1:
            is_2d = True
        if checks.is_pandas(args[i]):
            is_pd = True

    # If target shape specified, check again if we work on 2-dim data
    if to_shape is not None:
        if isinstance(to_shape, int):
            to_shape = (to_shape, )
        checks.assert_type(to_shape, tuple)
        if len(to_shape) > 1:
            is_2d = True

    if to_frame is not None:
        # force either keeping Series or converting them to DataFrames
        is_2d = to_frame

    if to_pd is not None:
        # force either raw or pandas
        if isinstance(to_pd, (tuple, list)):
            is_pd = any(to_pd)
        else:
            is_pd = to_pd

    # Align pandas objects
    if align_index:
        index_to_align = []
        for i in range(len(args)):
            if checks.is_pandas(args[i]) and len(args[i].index) > 1:
                index_to_align.append(i)
        if len(index_to_align) > 1:
            indexes = [args[i].index for i in index_to_align]
            if len(set(map(len, indexes))) > 1:
                index_indices = index_fns.align_indexes(*indexes)
                for i in range(len(args)):
                    if i in index_to_align:
                        args[i] = args[i].iloc[index_indices[
                            index_to_align.index(i)]]
    if align_columns:
        cols_to_align = []
        for i in range(len(args)):
            if checks.is_frame(args[i]) and len(args[i].columns) > 1:
                cols_to_align.append(i)
        if len(cols_to_align) > 1:
            indexes = [args[i].columns for i in cols_to_align]
            if len(set(map(len, indexes))) > 1:
                col_indices = index_fns.align_indexes(*indexes)
                for i in range(len(args)):
                    if i in cols_to_align:
                        args[i] = args[i].iloc[:, col_indices[cols_to_align.
                                                              index(i)]]

    # Convert all pd.Series objects to pd.DataFrame if we work on 2-dim data
    args_2d = [
        arg.to_frame() if is_2d and checks.is_series(arg) else arg
        for arg in args
    ]

    # Get final shape
    if to_shape is None:
        to_shape = np.lib.stride_tricks._broadcast_shape(*args_2d)

    # Perform broadcasting
    new_args = []
    for i, arg in enumerate(args_2d):
        if isinstance(keep_raw, (tuple, list)):
            _keep_raw = keep_raw[i]
        else:
            _keep_raw = keep_raw
        bc_arg = np.broadcast_to(arg, to_shape)
        if _keep_raw:
            new_args.append(arg)
            continue
        new_args.append(bc_arg)

    # Force to match requirements
    for i in range(len(new_args)):
        if isinstance(require_kwargs, (tuple, list)):
            _require_kwargs = require_kwargs[i]
        else:
            _require_kwargs = require_kwargs
        new_args[i] = np.require(new_args[i], **_require_kwargs)

    if is_pd:
        # Decide on index and columns
        # NOTE: Important to pass args, not args_2d, to preserve original shape info
        new_index = broadcast_index(args,
                                    to_shape,
                                    index_from=index_from,
                                    axis=0,
                                    **kwargs)
        new_columns = broadcast_index(args,
                                      to_shape,
                                      index_from=columns_from,
                                      axis=1,
                                      **kwargs)
    else:
        new_index, new_columns = None, None

    # Bring arrays to their old types (e.g. array -> pandas)
    for i in range(len(new_args)):
        if isinstance(keep_raw, (tuple, list)):
            _keep_raw = keep_raw[i]
        else:
            _keep_raw = keep_raw
        if _keep_raw:
            continue
        if isinstance(to_pd, (tuple, list)):
            _is_pd = to_pd[i]
        else:
            _is_pd = is_pd
        new_args[i] = wrap_broadcasted(args[i],
                                       new_args[i],
                                       is_pd=_is_pd,
                                       new_index=new_index,
                                       new_columns=new_columns)

    if len(new_args) > 1:
        if return_meta:
            return tuple(new_args), to_shape, new_index, new_columns
        return tuple(new_args)
    if return_meta:
        return new_args[0], to_shape, new_index, new_columns
    return new_args[0]
Пример #30
0
 def test_is_series(self):
     assert not checks.is_series(0)
     assert not checks.is_series(np.array([0]))
     assert checks.is_series(pd.Series([1, 2, 3]))
     assert not checks.is_series(pd.DataFrame([1, 2, 3]))