def broadcast_to(arg1, arg2, index_from=1, columns_from=1, writeable=False, copy_kwargs={}, raw=False, **kwargs): """Bring first argument to the shape of second argument. Closely resembles the other broadcast function.""" if not checks.is_array_like(arg1): arg1 = np.asarray(arg1) if not checks.is_array_like(arg2): arg2 = np.asarray(arg2) is_2d = arg1.ndim > 1 or arg2.ndim > 1 is_pd = checks.is_pandas(arg1) or checks.is_pandas(arg2) if is_pd: if is_2d: if checks.is_series(arg1): arg1 = arg1.to_frame() if checks.is_series(arg2): arg2 = arg2.to_frame() new_index = broadcast_index(arg1, arg2, index_from=index_from, axis=0, is_2d=is_2d, **kwargs) new_columns = broadcast_index(arg1, arg2, index_from=columns_from, axis=1, is_2d=is_2d, **kwargs) else: new_index, new_columns = None, None if is_broadcasting_needed(arg1, arg2): arg1_new = np.broadcast_to(arg1, arg2.shape, subok=True) arg1_new = np.array(arg1_new, copy=writeable, **copy_kwargs) else: arg1_new = np.array(arg1, copy=False, **copy_kwargs) return wrap_broadcasted(arg1, arg1_new, is_pd=is_pd, new_index=new_index, new_columns=new_columns)
def wrap_broadcasted(old_arg, new_arg, is_pd=False, new_index=None, new_columns=None): """Transform newly broadcasted array to match the type of the original object.""" if is_pd: if checks.is_pandas(old_arg): if new_index is None: # Take index from original pandas object if old_arg.shape[0] == new_arg.shape[0]: new_index = old_arg.index else: new_index = index_fns.repeat(old_arg.index, new_arg.shape[0]) if new_columns is None: # Take columns from original pandas object if new_arg.ndim == 2: if checks.is_series(old_arg): old_arg = old_arg.to_frame() if old_arg.shape[1] == new_arg.shape[1]: new_columns = old_arg.columns else: new_columns = index_fns.repeat(old_arg.columns, new_arg.shape[1]) else: if new_index is None and new_columns is None: # Return plain numpy array if not pandas and no rules set return new_arg return wrap_array(new_arg, index=new_index, columns=new_columns) return new_arg
def wrap_broadcasted(old_arg, new_arg, is_pd=False, new_index=None, new_columns=None): """If the newly brodcasted array was originally a pandas object, make it pandas object again and assign it the newly broadcasted index/columns.""" if is_pd: if checks.is_pandas(old_arg): if new_index is None: # Take index from original pandas object if old_arg.shape[0] == new_arg.shape[0]: new_index = old_arg.index else: new_index = index_fns.repeat_index(old_arg.index, new_arg.shape[0]) if new_columns is None: # Take columns from original pandas object if new_arg.ndim == 2: if checks.is_series(old_arg): old_arg = old_arg.to_frame() if old_arg.shape[1] == new_arg.shape[1]: new_columns = old_arg.columns else: new_columns = index_fns.repeat_index( old_arg.columns, new_arg.shape[1]) else: if new_index is None and new_columns is None: # Return plain numpy array if not pandas and no rules set return new_arg return ArrayWrapper(index=new_index, columns=new_columns).wrap(new_arg) return new_arg
def broadcast_ts(ts, params_len, new_columns): """Broadcast time series `ts` to match the length of `new_columns` through tiling.""" if checks.is_series(ts) or len(new_columns) > ts.shape[1]: return ts.vbt.wrap(reshape_fns.tile(ts.values, params_len, axis=1), columns=new_columns) else: return ts.vbt.wrap(ts, columns=new_columns)
def __call__(self, trace_names=None, **kwargs): if trace_names is None: if checks.is_frame(self._obj) or (checks.is_series(self._obj) and self._obj.name is not None): trace_names = reshape_fns.to_2d(self._obj).columns return widgets.Histogram(trace_names=trace_names, data=self._obj.values, **kwargs)
def __call__(self, x_labels=None, trace_names=None, **kwargs): if x_labels is None: x_labels = self._obj.index if trace_names is None: if checks.is_frame(self._obj) or (checks.is_series(self._obj) and self._obj.name is not None): trace_names = reshape_fns.to_2d(self._obj).columns return widgets.Scatter(x_labels, trace_names=trace_names, data=self._obj.values, **kwargs)
def empty_like(cls, other, fill_value=np.nan): """Generate an empty Series/DataFrame like `other` and fill with `fill_value`.""" if checks.is_series(other): return cls.empty(other.shape, fill_value=fill_value, index=other.index, name=other.name) return cls.empty(other.shape, fill_value=fill_value, index=other.index, columns=other.columns)
def mapper_indexing_func(mapper, ref_obj, pd_indexing_func): """Broadcast `mapper` Series to `ref_obj` and perform pandas indexing using `pd_indexing_func`.""" checks.assert_type(mapper, pd.Series) checks.assert_type(ref_obj, (pd.Series, pd.DataFrame)) df_range_mapper = reshape_fns.broadcast_to(np.arange(len(mapper.index)), ref_obj) loced_range_mapper = pd_indexing_func(df_range_mapper) new_mapper = mapper.iloc[loced_range_mapper.values[0]] if checks.is_frame(loced_range_mapper): return pd.Series(new_mapper.values, index=loced_range_mapper.columns, name=mapper.name) elif checks.is_series(loced_range_mapper): return pd.Series([new_mapper], index=[loced_range_mapper.name], name=mapper.name)
def get_index(arg, axis): """Get index of `arg` by `axis`.""" checks.assert_type(arg, (pd.Series, pd.DataFrame)) checks.assert_in(axis, (0, 1)) if axis == 0: return arg.index else: if checks.is_series(arg): if arg.name is not None: return pd.Index([arg.name]) return pd.Index([0]) # same as how pandas does it else: return arg.columns
def indexing_on_mapper(mapper: tp.Series, ref_obj: tp.SeriesFrame, pd_indexing_func: tp.Callable) -> tp.Optional[tp.Series]: """Broadcast `mapper` Series to `ref_obj` and perform pandas indexing using `pd_indexing_func`.""" checks.assert_instance_of(mapper, pd.Series) checks.assert_instance_of(ref_obj, (pd.Series, pd.DataFrame)) df_range_mapper = reshape_fns.broadcast_to(np.arange(len(mapper.index)), ref_obj) loced_range_mapper = pd_indexing_func(df_range_mapper) new_mapper = mapper.iloc[loced_range_mapper.values[0]] if checks.is_frame(loced_range_mapper): return pd.Series(new_mapper.values, index=loced_range_mapper.columns, name=mapper.name) elif checks.is_series(loced_range_mapper): return pd.Series([new_mapper], index=[loced_range_mapper.name], name=mapper.name) return None
def indexing_func(self: BaseAccessorT, pd_indexing_func: tp.PandasIndexingFunc, **kwargs) -> BaseAccessorT: """Perform indexing on `BaseAccessor`.""" new_wrapper, idx_idxs, _, col_idxs = self.wrapper.indexing_func_meta(pd_indexing_func, **kwargs) new_obj = new_wrapper.wrap(self.to_2d_array()[idx_idxs, :][:, col_idxs], group_by=False) if checks.is_series(new_obj): return self.replace( cls_=self.sr_accessor_cls, obj=new_obj, wrapper=new_wrapper ) return self.replace( cls_=self.df_accessor_cls, obj=new_obj, wrapper=new_wrapper )
def soft_to_ndim(arg, ndim): """Try to softly bring `arg` to the specified number of dimensions `ndim` (max 2).""" if not checks.is_array(arg): arg = np.asarray(arg) if ndim == 1: if arg.ndim == 2: if arg.shape[1] == 1: if checks.is_frame(arg): return arg.iloc[:, 0] return arg[:, 0] # downgrade if ndim == 2: if arg.ndim == 1: if checks.is_series(arg): return arg.to_frame() return arg[:, None] # upgrade return arg # do nothing
def empty_like(cls, other: tp.SeriesFrame, fill_value: tp.Scalar = np.nan, **kwargs) -> tp.SeriesFrame: """Generate an empty Series/DataFrame like `other` and fill with `fill_value`.""" if checks.is_series(other): return cls.empty(other.shape, fill_value=fill_value, index=other.index, name=other.name, **kwargs) return cls.empty(other.shape, fill_value=fill_value, index=other.index, columns=other.columns, **kwargs)
def soft_to_ndim(arg: tp.ArrayLike, ndim: int, raw: bool = False) -> tp.AnyArray: """Try to softly bring `arg` to the specified number of dimensions `ndim` (max 2).""" arg = to_any_array(arg, raw=raw) if ndim == 1: if arg.ndim == 2: if arg.shape[1] == 1: if checks.is_frame(arg): return arg.iloc[:, 0] return arg[:, 0] # downgrade if ndim == 2: if arg.ndim == 1: if checks.is_series(arg): return arg.to_frame() return arg[:, None] # upgrade return arg # do nothing
def to_2d(arg, raw=False, expand_axis=1): """Reshape argument to two dimensions. If `raw` is True, returns NumPy array. If 1-dim, will expand along axis 1 (i.e., Series to DataFrame with one column).""" if raw or not checks.is_array(arg): arg = np.asarray(arg) if arg.ndim == 2: return arg elif arg.ndim == 1: if checks.is_series(arg): if expand_axis == 0: return pd.DataFrame(arg.values[None, :], columns=arg.index) elif expand_axis == 1: return arg.to_frame() return np.expand_dims(arg, expand_axis) elif arg.ndim == 0: return arg.reshape((1, 1)) raise ValueError(f"Cannot reshape a {arg.ndim}-dimensional array to 2 dimensions")
def to_2d(arg, raw=False, expand_axis=1): """Reshape argument to two dimensions.""" if raw: arg = np.asarray(arg) if not checks.is_array_like(arg): arg = np.asarray(arg) if arg.ndim == 2: return arg elif arg.ndim == 1: if checks.is_series(arg): if expand_axis == 0: return pd.DataFrame(arg.values[None, :], columns=arg.index) elif expand_axis == 1: return arg.to_frame() return np.expand_dims(arg, expand_axis) elif arg.ndim == 0: return arg.reshape((1, 1)) raise ValueError( f"Cannot reshape a {arg.ndim}-dimensional array to 2 dimensions")
def to_mapping(mapping_like: tp.MappingLike, reverse: bool = False) -> dict: """Convert mapping-like object to a mapping. Enable `reverse` to apply `reverse_mapping` on the result dict.""" if checks.is_namedtuple(mapping_like): mapping = {v: k for k, v in mapping_like._asdict().items()} if -1 not in mapping_like: mapping[-1] = None elif not checks.is_mapping(mapping_like): if checks.is_index(mapping_like): mapping_like = mapping_like.to_series().reset_index(drop=True) if checks.is_series(mapping_like): mapping = mapping_like.to_dict() else: mapping = dict(enumerate(mapping_like)) else: mapping = dict(mapping_like) if reverse: mapping = reverse_mapping(mapping) return mapping
def from_orders(cls, main_price, order_size, size_type=SizeType.Shares, order_price=None, init_capital=None, fees=None, fixed_fees=None, slippage=None, broadcast_kwargs={}, freq=None, **kwargs): """Build portfolio from orders. Starting with initial capital `init_capital`, at each time step, orders the number of shares specified in `order_size` for `order_price`. For more details, see `vectorbt.portfolio.nb.simulate_from_orders_nb`. Args: main_price (pandas_like): Main price of the asset, such as close. Will broadcast. order_size (float or array_like): The amount of shares to order. Will broadcast. If the size is positive, this is the number of shares to buy. If the size is negative, this is the number of shares to sell. To buy/sell everything, set the size to `np.inf`. size_type (int or array_like): See `vectorbt.portfolio.enums.SizeType`. order_price (array_like): Order price. Defaults to `main_price`. Will broadcast. init_capital (float or array_like): The initial capital. Will broadcast. Allowed is either a single value or value per column. fees (float or array_like): Fees in percentage of the order value. Will broadcast. fixed_fees (float or array_like): Fixed amount of fees to pay per order. Will broadcast. slippage (float or array_like): Slippage in percentage of price. Will broadcast. broadcast_kwargs: Keyword arguments passed to `vectorbt.base.reshape_fns.broadcast`. freq (any): Index frequency in case `main_price.index` is not datetime-like. **kwargs: Keyword arguments passed to the `__init__` method. For defaults, see `vectorbt.defaults.portfolio`. All time series will be broadcasted together using `vectorbt.base.reshape_fns.broadcast`. At the end, they will have the same metadata. Example: Portfolio from various order sequences: ```python-repl >>> portfolio = vbt.Portfolio.from_orders(price, orders, ... init_capital=100, fees=0.0025, fixed_fees=1., slippage=0.001) >>> portfolio.orders.records col idx size price fees side 0 0 0 98.654463 1.001 1.246883 0 1 1 0 1.000000 1.001 1.002502 0 2 1 1 1.000000 2.002 1.005005 0 3 1 2 1.000000 3.003 1.007507 0 4 1 3 1.000000 2.002 1.005005 0 5 1 4 4.000000 0.999 1.009990 1 6 2 0 98.654463 1.001 1.246883 0 7 2 1 98.654463 1.998 1.492779 1 8 2 2 64.646521 3.003 1.485334 0 9 2 3 64.646521 1.998 1.322909 1 10 2 4 126.398131 1.001 1.316311 0 >>> portfolio.equity a b c 2020-01-01 98.654463 98.996498 98.654463 2020-01-02 197.308925 98.989493 195.618838 2020-01-03 295.963388 99.978985 193.939564 2020-01-04 197.308925 95.971980 127.840840 2020-01-05 98.654463 90.957990 126.398131 ``` """ # Get defaults if order_price is None: order_price = main_price if init_capital is None: init_capital = defaults.portfolio['init_capital'] if fees is None: fees = defaults.portfolio['fees'] if fixed_fees is None: fixed_fees = defaults.portfolio['fixed_fees'] if slippage is None: slippage = defaults.portfolio['slippage'] # Perform checks checks.assert_type(main_price, (pd.Series, pd.DataFrame)) # Broadcast inputs # Only main_price is broadcasted, others can remain unchanged thanks to flexible indexing keep_raw = (False, True, True, True, True, True, True, True) main_price, order_size, size_type, order_price, fees, fixed_fees, slippage, init_capital = \ reshape_fns.broadcast( main_price, order_size, size_type, order_price, fees, fixed_fees, slippage, init_capital, **broadcast_kwargs, writeable=True, keep_raw=keep_raw) target_shape = (main_price.shape[0], main_price.shape[1] if main_price.ndim > 1 else 1) # Perform calculation order_records, cash, shares = nb.simulate_from_orders_nb( target_shape, init_capital, order_size, size_type, order_price, fees, fixed_fees, slippage, is_2d=main_price.ndim == 2) # Bring to the same meta cash = main_price.vbt.wrap(cash) shares = main_price.vbt.wrap(shares) orders = Orders(order_records, main_price, freq=freq) if checks.is_series(main_price): init_capital = init_capital.item(0) else: init_capital = np.broadcast_to(init_capital, (target_shape[1], )) init_capital = main_price.vbt.wrap_reduced(init_capital) return cls(main_price, init_capital, orders, cash, shares, freq=freq, **kwargs)
def _indexing_func_meta(self, pd_indexing_func, index=None, columns=None, column_only_select=None, group_select=None, group_by=None): """Perform indexing on `ArrayWrapper` and also return indexing metadata. Takes into account column grouping. Set `column_only_select` to True to index the array wrapper as a Series of columns. This way, selection of index (axis 0) can be avoided. Set `group_select` to True to select groups rather than columns. Takes effect only if grouping is enabled. !!! note If `column_only_select` is True, make sure to index the array wrapper as a Series of columns rather than a DataFrame. For example, the operation `.iloc[:, :2]` should become `.iloc[:2]`. Operations are not allowed if the object is already a Series and thus has only one column/group.""" from vectorbt import settings if column_only_select is None: column_only_select = self.column_only_select if column_only_select is None: column_only_select = settings.array_wrapper['column_only_select'] if group_select is None: group_select = self.group_select if group_select is None: group_select = settings.array_wrapper['group_select'] _self = self.regroup(group_by) group_select = group_select and _self.grouper.is_grouped() if index is None: index = _self.index if columns is None: if group_select: columns = _self.grouper.get_columns() else: columns = _self.columns if group_select: # Groups as columns i_wrapper = ArrayWrapper(index, columns, _self.get_ndim()) else: # Columns as columns i_wrapper = ArrayWrapper(index, columns, _self.ndim) n_rows = len(index) n_cols = len(columns) if column_only_select: if i_wrapper.ndim == 1: raise IndexingError( "Columns only: Attempting to select a column on a Series") col_mapper = i_wrapper.wrap_reduced(np.arange(n_cols), columns=columns) try: col_mapper = pd_indexing_func(col_mapper) except pd.core.indexing.IndexingError as e: warnings.warn( "Columns only: Make sure to treat this object " "as a Series of columns rather than a DataFrame", stacklevel=2) raise e if checks.is_series(col_mapper): new_columns = col_mapper.index col_idxs = col_mapper.values new_ndim = 2 else: new_columns = columns[[col_mapper]] col_idxs = col_mapper new_ndim = 1 new_index = index idx_idxs = np.arange(len(index)) else: idx_mapper = i_wrapper.wrap(np.broadcast_to( np.arange(n_rows)[:, None], (n_rows, n_cols)), index=index, columns=columns) idx_mapper = pd_indexing_func(idx_mapper) if i_wrapper.ndim == 1: if not checks.is_series(idx_mapper): raise IndexingError("Selection of a scalar is not allowed") idx_idxs = idx_mapper.values col_idxs = 0 else: col_mapper = i_wrapper.wrap(np.broadcast_to( np.arange(n_cols), (n_rows, n_cols)), index=index, columns=columns) col_mapper = pd_indexing_func(col_mapper) if checks.is_frame(idx_mapper): idx_idxs = idx_mapper.values[:, 0] col_idxs = col_mapper.values[0] elif checks.is_series(idx_mapper): one_col = np.all( col_mapper.values == col_mapper.values.item(0)) one_idx = np.all( idx_mapper.values == idx_mapper.values.item(0)) if one_col and one_idx: # One index and one column selected, multiple times raise IndexingError( "Must select at least two unique indices in one of both axes" ) elif one_col: # One column selected idx_idxs = idx_mapper.values col_idxs = col_mapper.values[0] elif one_idx: # One index selected idx_idxs = idx_mapper.values[0] col_idxs = col_mapper.values else: raise IndexingError else: raise IndexingError("Selection of a scalar is not allowed") new_index = index_fns.get_index(idx_mapper, 0) if not isinstance(idx_idxs, np.ndarray): # One index selected new_columns = index[[idx_idxs]] elif not isinstance(col_idxs, np.ndarray): # One column selected new_columns = columns[[col_idxs]] else: new_columns = index_fns.get_index(idx_mapper, 1) new_ndim = idx_mapper.ndim if _self.grouper.is_grouped(): # Grouping enabled if np.asarray(idx_idxs).ndim == 0: raise IndexingError( "Flipping index and columns is not allowed") if group_select: # Selection based on groups # Get indices of columns corresponding to selected groups group_idxs = col_idxs group_idxs_arr = reshape_fns.to_1d(group_idxs) group_start_idxs = _self.grouper.get_group_start_idxs( )[group_idxs_arr] group_end_idxs = _self.grouper.get_group_end_idxs( )[group_idxs_arr] ungrouped_col_idxs = get_ranges_arr(group_start_idxs, group_end_idxs) ungrouped_columns = _self.columns[ungrouped_col_idxs] if new_ndim == 1 and len(ungrouped_columns) == 1: ungrouped_ndim = 1 ungrouped_col_idxs = ungrouped_col_idxs[0] else: ungrouped_ndim = 2 # Get indices of selected groups corresponding to the new columns # We could do _self.group_by[ungrouped_col_idxs] but indexing operation may have changed the labels group_lens = _self.grouper.get_group_lens()[group_idxs_arr] ungrouped_group_idxs = np.full(len(ungrouped_columns), 0) ungrouped_group_idxs[group_lens[:-1]] = 1 ungrouped_group_idxs = np.cumsum(ungrouped_group_idxs) return _self.copy(index=new_index, columns=ungrouped_columns, ndim=ungrouped_ndim, grouped_ndim=new_ndim, group_by=new_columns[ungrouped_group_idxs] ), idx_idxs, group_idxs, ungrouped_col_idxs # Selection based on columns col_idxs_arr = reshape_fns.to_1d(col_idxs) return _self.copy(index=new_index, columns=new_columns, ndim=new_ndim, grouped_ndim=None, group_by=_self.grouper.group_by[col_idxs_arr] ), idx_idxs, col_idxs, col_idxs # Grouping disabled return _self.copy(index=new_index, columns=new_columns, ndim=new_ndim, grouped_ndim=None, group_by=None), idx_idxs, col_idxs, col_idxs
def broadcast(*args, to_shape=None, to_pd=None, to_2d=None, index_from='default', columns_from='default', writeable=False, copy_kwargs={}, keep_raw=False, **kwargs): """Bring any array-like object in `args` to the same shape by using NumPy broadcasting. See [Broadcasting](https://docs.scipy.org/doc/numpy/user/basics.broadcasting.html). Can broadcast pandas objects by broadcasting their index/columns with `broadcast_index`. Args: *args (array_like): Array-like objects. to_shape (tuple): Target shape. If set, will broadcast every element in `args` to `to_shape`. to_pd (bool, tuple or list): If `True`, converts all output arrays to pandas, otherwise returns raw NumPy arrays. If `None`, converts only if there is at least one pandas object among them. to_2d (bool): If `True`, converts all Series to DataFrames. index_from (None, int, str or array_like): Broadcasting rule for index. columns_from (None, int, str or array_like): Broadcasting rule for columns. writeable (bool): If `True`, makes broadcasted arrays writable, otherwise readonly. !!! note Has effect only if broadcasting was needed for that particular array. Making arrays writable is possible only through copying them, which is pretty expensive. Numba requires arrays to be writable. copy_kwargs (dict): Keyword arguments passed to `np.array`. For example, to specify `order`. !!! note Has effect on every array, independent from whether broadcasting was needed or not. keep_raw (bool, tuple or list): If `True`, will keep the unbroadcasted version of the array. **kwargs: Keyword arguments passed to `broadcast_index`. For defaults, see `vectorbt.defaults.broadcasting`. Example: Without broadcasting index and columns: ```python-repl >>> import numpy as np >>> import pandas as pd >>> from vectorbt.base.reshape_fns import broadcast >>> v = 0 >>> a = np.array([1, 2, 3]) >>> sr = pd.Series([1, 2, 3], index=pd.Index(['x', 'y', 'z']), name='a') >>> df = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], ... index=pd.Index(['x2', 'y2', 'z2']), ... columns=pd.Index(['a2', 'b2', 'c2'])) >>> for i in broadcast( ... v, a, sr, df, ... index_from=None, ... columns_from=None, ... ): print(i) 0 1 2 0 0 0 0 1 0 0 0 2 0 0 0 0 1 2 0 1 2 3 1 1 2 3 2 1 2 3 a a a x 1 1 1 y 2 2 2 z 3 3 3 a2 b2 c2 x2 1 2 3 y2 4 5 6 z2 7 8 9 ``` Taking new index and columns from position: ```python-repl >>> for i in broadcast( ... v, a, sr, df, ... index_from=2, ... columns_from=3 ... ): print(i) a2 b2 c2 x 0 0 0 y 0 0 0 z 0 0 0 a2 b2 c2 x 1 2 3 y 1 2 3 z 1 2 3 a2 b2 c2 x 1 1 1 y 2 2 2 z 3 3 3 a2 b2 c2 x 1 2 3 y 4 5 6 z 7 8 9 ``` Broadcasting index and columns through stacking: ```python-repl >>> for i in broadcast( ... v, a, sr, df, ... index_from='stack', ... columns_from='stack' ... ): print(i) a2 b2 c2 x x2 0 0 0 y y2 0 0 0 z z2 0 0 0 a2 b2 c2 x x2 1 2 3 y y2 1 2 3 z z2 1 2 3 a2 b2 c2 x x2 1 1 1 y y2 2 2 2 z z2 3 3 3 a2 b2 c2 x x2 1 2 3 y y2 4 5 6 z z2 7 8 9 ``` Setting index and columns manually: ```python-repl >>> for i in broadcast( ... v, a, sr, df, ... index_from=['a', 'b', 'c'], ... columns_from=['d', 'e', 'f'] ... ): print(i) d e f a 0 0 0 b 0 0 0 c 0 0 0 d e f a 1 2 3 b 1 2 3 c 1 2 3 d e f a 1 1 1 b 2 2 2 c 3 3 3 d e f a 1 2 3 b 4 5 6 c 7 8 9 ``` """ is_pd = False is_2d = False args = list(args) if isinstance(index_from, str) and index_from == 'default': index_from = defaults.broadcasting['index_from'] if isinstance(columns_from, str) and columns_from == 'default': columns_from = defaults.broadcasting['columns_from'] # Convert to np.ndarray object if not numpy or pandas # Also check whether we broadcast to pandas and whether work on 2-dim data for i in range(len(args)): if not checks.is_array(args[i]): args[i] = np.asarray(args[i]) if args[i].ndim > 1: is_2d = True if checks.is_pandas(args[i]): is_pd = True # If target shape specified, check again if we work on 2-dim data if to_shape is not None: checks.assert_type(to_shape, tuple) if len(to_shape) > 1: is_2d = True if to_2d is not None: # force either keeping Series or converting them to DataFrames is_2d = to_2d if to_pd is not None: # force either raw or pandas if isinstance(to_pd, (tuple, list)): is_pd = np.array(to_pd).any() else: is_pd = to_pd # Convert all pd.Series objects to pd.DataFrame if we work on 2-dim data args_2d = [ arg.to_frame() if is_2d and checks.is_series(arg) else arg for arg in args ] # Get final shape if to_shape is None: to_shape = np.lib.stride_tricks._broadcast_shape(*args_2d) # Perform broadcasting new_args = [] for i, arg in enumerate(args_2d): if isinstance(keep_raw, (tuple, list)): _keep_raw = keep_raw[i] else: _keep_raw = keep_raw if _keep_raw: new_args.append(arg) continue new_args.append(np.broadcast_to(arg, to_shape, subok=True)) # The problem is that broadcasting creates readonly objects and Numba requires writable ones. # To make them writable we must copy, which is ok for small-sized arrays and not ok for large ones. # Thus check if broadcasting was needed in the first place, and if so, copy for i in range(len(new_args)): if new_args[i].shape == args_2d[i].shape: # Broadcasting was not needed, take old array new_args[i] = np.array(args_2d[i], copy=False, **copy_kwargs) else: # Broadcasting was needed, take new array new_args[i] = np.array(new_args[i], copy=writeable, **copy_kwargs) if is_pd: # Decide on index and columns # NOTE: Important to pass args, not args_2d, to preserve original shape info new_index = broadcast_index(args, to_shape, index_from=index_from, axis=0, **kwargs) new_columns = broadcast_index(args, to_shape, index_from=columns_from, axis=1, **kwargs) else: new_index, new_columns = None, None # Bring arrays to their old types (e.g. array -> pandas) for i in range(len(new_args)): if isinstance(keep_raw, (tuple, list)): _keep_raw = keep_raw[i] else: _keep_raw = keep_raw if _keep_raw: continue if isinstance(to_pd, (tuple, list)): _is_pd = to_pd[i] else: _is_pd = is_pd new_args[i] = wrap_broadcasted(args[i], new_args[i], is_pd=_is_pd, new_index=new_index, new_columns=new_columns) if len(new_args) > 1: return tuple(new_args) return new_args[0]
def columns(self): """Return `[name]` of Series and `columns` of DataFrame.""" if checks.is_series(self._obj): return pd.Index([self._obj.name]) return self._obj.columns
def from_order_func(cls, main_price, order_func_nb, *args, init_capital=None, row_wise=False, row_prep_func_nb=None, broadcast_kwargs={}, freq=None, **kwargs): """Build portfolio from a custom order function. Starting with initial capital `init_capital`, iterates over shape `main_price.shape`, and for each data point, generates an order using `order_func_nb`. This way, you can specify order size, price and transaction costs dynamically (for example, based on the current balance). if `row_wise` is `True`, see `vectorbt.portfolio.nb.simulate_row_wise_nb`. Otherwise, see `vectorbt.portfolio.nb.simulate_nb`. Args: main_price (pandas_like): Main price of the asset, such as close. Will broadcast. order_func_nb (function): Function that returns an order. See `vectorbt.portfolio.enums.Order`. *args: Arguments passed to `order_func_nb`. init_capital (float or array_like): The initial capital. Will broadcast. Allowed is either a single value or value per column. row_wise (bool): If `True`, iterates over rows, otherwise over columns. Set to `True` if columns depend upon each other. row_prep_func_nb (function): Function to call before iterating over the next row. Can be used to do preprocessing, such as to calculate past returns. broadcast_kwargs: Keyword arguments passed to `vectorbt.base.reshape_fns.broadcast`. freq (any): Index frequency in case `main_price.index` is not datetime-like. **kwargs: Keyword arguments passed to the `__init__` method. For defaults, see `vectorbt.defaults.portfolio`. All time series will be broadcasted together using `vectorbt.base.reshape_fns.broadcast`. At the end, they will have the same metadata. !!! note `order_func_nb` must be Numba-compiled. Example: Placing a buy order each day: ```python-repl >>> from vectorbt.portfolio import Order, SizeType >>> @njit ... def order_func_nb(oc, price): ... return Order(10, SizeType.Shares, price[oc.i], ... fees=0.01, fixed_fees=1., slippage=0.01) >>> portfolio = vbt.Portfolio.from_order_func( ... price, order_func_nb, price.values, init_capital=100) >>> portfolio.orders.records col idx size price fees side 0 0 0 10.0 1.01 1.101 0 1 0 1 10.0 2.02 1.202 0 2 0 2 10.0 3.03 1.303 0 3 0 3 10.0 2.02 1.202 0 4 0 4 10.0 1.01 1.101 0 >>> portfolio.equity 2020-01-01 98.799 2020-01-02 107.397 2020-01-03 125.794 2020-01-04 94.392 2020-01-05 53.191 Name: a, dtype: float64 ``` """ # Get defaults if init_capital is None: init_capital = defaults.portfolio['init_capital'] # Perform checks checks.assert_type(main_price, (pd.Series, pd.DataFrame)) checks.assert_numba_func(order_func_nb) # Broadcast inputs # Only main_price is broadcasted, others can remain unchanged thanks to flexible indexing keep_raw = (False, True) main_price, init_capital = reshape_fns.broadcast(main_price, init_capital, **broadcast_kwargs, writeable=True, keep_raw=keep_raw) target_shape = (main_price.shape[0], main_price.shape[1] if main_price.ndim > 1 else 1) # Perform calculation if row_wise: if row_prep_func_nb is None: row_prep_func_nb = nb.none_row_prep_func_nb order_records, cash, shares = nb.simulate_row_wise_nb( target_shape, init_capital, row_prep_func_nb, order_func_nb, *args) else: if row_prep_func_nb is not None: raise ValueError( "Function row_prep_func_nb can be only called when row_wise=True" ) order_records, cash, shares = nb.simulate_nb( target_shape, init_capital, order_func_nb, *args) # Bring to the same meta cash = main_price.vbt.wrap(cash) shares = main_price.vbt.wrap(shares) orders = Orders(order_records, main_price, freq=freq) if checks.is_series(main_price): init_capital = init_capital.item(0) else: init_capital = np.broadcast_to(init_capital, (target_shape[1], )) init_capital = main_price.vbt.wrap_reduced(init_capital) return cls(main_price, init_capital, orders, cash, shares, freq=freq, **kwargs)
def broadcast_index(*args, index_from=None, axis=0, is_2d=False, ignore_single='default', drop_duplicates='default', keep='default'): """Broadcast index/columns of all arguments.""" if ignore_single == 'default': ignore_single = defaults.broadcast['ignore_single'] if drop_duplicates == 'default': drop_duplicates = defaults.broadcast['drop_duplicates'] if keep == 'default': keep = defaults.broadcast['keep'] index_str = 'columns' if axis == 1 else 'index' new_index = None if index_from is not None: if isinstance(index_from, int): # Take index/columns of the object indexed by index_from if axis == 1: new_index = to_2d(args[index_from]).columns else: new_index = args[index_from].index elif isinstance(index_from, str) and index_from in ('stack', 'strict'): # If pandas objects have different index/columns, stack them together # maxlen stores the length of the longest index max_shape = np.lib.stride_tricks._broadcast_shape(*args) if axis == 1 and len(max_shape) == 1: max_shape = (max_shape[0], 1) maxlen = max_shape[1] if axis == 1 else max_shape[0] for arg in args: if checks.is_pandas(arg): if checks.is_series(arg): arg = arg.to_frame() # series name counts as a column index = arg.columns if axis == 1 else arg.index if new_index is None: new_index = index else: if index_from == 'strict': # If pandas objects have different index/columns, raise an exception if not pd.Index.equals(index, new_index): raise ValueError( f"Broadcasting {index_str} is not allowed for {index_str}_from=strict" ) # Broadcasting index must follow the rules of a regular broadcasting operation # https://docs.scipy.org/doc/numpy/user/basics.broadcasting.html#general-broadcasting-rules # 1. rule: if indexes are of the same length, they are simply stacked # 2. rule: if index has one element, it gets repeated and then stacked if pd.Index.equals(index, new_index): continue if len(index) != len(new_index): if len(index) > 1 and len(new_index) > 1: raise ValueError( "Indexes could not be broadcast together") if ignore_single: # Columns of length 1 should be simply ignored if len(index) > len(new_index): new_index = index continue else: if len(index) > len(new_index): new_index = index_fns.repeat( new_index, len(index)) elif len(index) < len(new_index): index = index_fns.repeat( index, len(new_index)) new_index = index_fns.stack(new_index, index) if drop_duplicates: new_index = index_fns.drop_duplicate_levels( new_index, keep=keep) if maxlen > len(new_index): if index_from == 'strict': raise ValueError( f"Broadcasting {index_str} is not allowed for {index_str}_from=strict" ) # This happens only when some numpy object is longer than the new pandas index # In this case, new pandas index (one element) should be repeated to match this length. if maxlen > 1 and len(new_index) > 1: raise ValueError("Indexes could not be broadcast together") new_index = index_fns.repeat(new_index, maxlen) else: raise ValueError( f"Invalid value {index_from} for {'columns' if axis == 1 else 'index'}_from" ) return new_index
def broadcast_index(*args, to_shape=None, index_from=None, axis=0, ignore_single='default', drop_duplicates='default', keep='default'): """Produce a broadcasted index/columns. Args: *args (array_like): Array-like objects. to_shape (tuple): Target shape. Optional. index_from (None, int, str or array_like): Broadcasting rule for this index/these columns. Accepts the following values: * `'default'` - take the value from `vectorbt.defaults.broadcasting` * `None` - use the original index/columns of the objects in `args` * `int` - use the index/columns of the i-nth object in `args` * `'strict'` - ensure that all pandas objects have the same index/columns * `'stack'` - stack different indexes/columns using `vectorbt.utils.index_fns.stack_indexes` * everything else will be converted to `pd.Index` axis (int): Set to 0 for index and 1 for columns. ignore_single (bool): If `True`, ignores indexes/columns with one value, otherwise they will be repeated to match the length of the longest index/columns (can lead to pollution of levels). drop_duplicates (bool): See `vectorbt.utils.index_fns.drop_duplicate_levels`. keep (bool): See `vectorbt.utils.index_fns.drop_duplicate_levels`. For defaults, see `vectorbt.defaults.broadcasting`. """ if ignore_single == 'default': ignore_single = defaults.broadcasting['ignore_single'] if drop_duplicates == 'default': drop_duplicates = defaults.broadcasting['drop_duplicates'] if keep == 'default': keep = defaults.broadcasting['keep'] index_str = 'columns' if axis == 1 else 'index' new_index = None if index_from is not None: if isinstance(index_from, int): # Take index/columns of the object indexed by index_from if axis == 1: new_index = to_2d(args[index_from]).columns else: new_index = args[index_from].index elif isinstance(index_from, str): if index_from in ('stack', 'strict'): # If pandas objects have different index/columns, stack them together # maxlen stores the length of the longest index if to_shape is None: # Simulate broadcasting to_shape = np.lib.stride_tricks._broadcast_shape(*args) if axis == 1 and len(to_shape) == 1: to_shape = (to_shape[0], 1) maxlen = to_shape[1] if axis == 1 else to_shape[0] for arg in args: if checks.is_pandas(arg): if checks.is_series(arg): arg = arg.to_frame( ) # series name counts as a column index = arg.columns if axis == 1 else arg.index if new_index is None: new_index = index else: if index_from == 'strict': # If pandas objects have different index/columns, raise an exception if not pd.Index.equals(index, new_index): raise ValueError( f"Broadcasting {index_str} is not allowed for {index_str}_from=strict" ) # Broadcasting index must follow the rules of a regular broadcasting operation # https://docs.scipy.org/doc/numpy/user/basics.broadcasting.html#general-broadcasting-rules # 1. rule: if indexes are of the same length, they are simply stacked # 2. rule: if index has one element, it gets repeated and then stacked if pd.Index.equals(index, new_index): continue if len(index) != len(new_index): if len(index) > 1 and len(new_index) > 1: raise ValueError( "Indexes could not be broadcast together" ) if ignore_single: # Columns of length 1 should be simply ignored if len(index) > len(new_index): new_index = index continue else: if len(index) > len(new_index): new_index = index_fns.repeat_index( new_index, len(index)) elif len(index) < len(new_index): index = index_fns.repeat_index( index, len(new_index)) new_index = index_fns.stack_indexes( new_index, index) if drop_duplicates: new_index = index_fns.drop_duplicate_levels( new_index, keep=keep) if maxlen > len(new_index): if index_from == 'strict': raise ValueError( f"Broadcasting {index_str} is not allowed for {index_str}_from=strict" ) # This happens only when some numpy object is longer than the new pandas index # In this case, new pandas index (one element) should be repeated to match this length. if maxlen > 1 and len(new_index) > 1: raise ValueError( "Indexes could not be broadcast together") new_index = index_fns.repeat_index(new_index, maxlen) else: raise ValueError( f"Invalid value {index_from} for {'columns' if axis == 1 else 'index'}_from" ) else: new_index = index_from return new_index
def wrap_reduced( self, arr: tp.ArrayLike, name_or_index: tp.NameIndex = None, columns: tp.Optional[tp.IndexLike] = None, fillna: tp.Optional[tp.Scalar] = None, dtype: tp.Optional[tp.PandasDTypeLike] = None, group_by: tp.GroupByLike = None, to_timedelta: bool = False, to_index: bool = False, silence_warnings: tp.Optional[bool] = None) -> tp.MaybeSeriesFrame: """Wrap result of reduction. `name_or_index` can be the name of the resulting series if reducing to a scalar per column, or the index of the resulting series/dataframe if reducing to an array per column. `columns` can be set to override object's default columns. See `ArrayWrapper.wrap` for the pipeline.""" from vectorbt._settings import settings array_wrapper_cfg = settings['array_wrapper'] if silence_warnings is None: silence_warnings = array_wrapper_cfg['silence_warnings'] checks.assert_not_none(self.ndim) _self = self.resolve(group_by=group_by) if columns is None: columns = _self.columns if not isinstance(columns, pd.Index): columns = pd.Index(columns) if to_index: if dtype is None: dtype = np.int_ if fillna is None: fillna = -1 def _wrap_reduced(arr): nonlocal name_or_index arr = np.asarray(arr) if fillna is not None: arr[pd.isnull(arr)] = fillna if arr.ndim == 0: # Scalar per Series/DataFrame return pd.Series(arr, dtype=dtype)[0] if arr.ndim == 1: if _self.ndim == 1: if arr.shape[0] == 1: # Scalar per Series/DataFrame with one column return pd.Series(arr, dtype=dtype)[0] # Array per Series sr_name = columns[0] if sr_name == 0: # was arr Series before sr_name = None if isinstance(name_or_index, str): name_or_index = None return pd.Series(arr, index=name_or_index, name=sr_name, dtype=dtype) # Scalar per column in arr DataFrame return pd.Series(arr, index=columns, name=name_or_index, dtype=dtype) if arr.ndim == 2: if arr.shape[1] == 1 and _self.ndim == 1: arr = reshape_fns.soft_to_ndim(arr, 1) # Array per Series sr_name = columns[0] if sr_name == 0: # was arr Series before sr_name = None if isinstance(name_or_index, str): name_or_index = None return pd.Series(arr, index=name_or_index, name=sr_name, dtype=dtype) # Array per column in DataFrame if isinstance(name_or_index, str): name_or_index = None return pd.DataFrame(arr, index=name_or_index, columns=columns, dtype=dtype) raise ValueError(f"{arr.ndim}-d input is not supported") out = _wrap_reduced(arr) if to_index: # Convert to index if checks.is_series(out): out = out.map(lambda x: self.index[x] if x != -1 else np.nan) elif checks.is_frame(out): out = out.applymap(lambda x: self.index[x] if x != -1 else np.nan) else: out = self.index[out] if out != -1 else np.nan if to_timedelta: # Convert to timedelta out = self.to_timedelta(out, silence_warnings=silence_warnings) return out
def wrap(self, arr: tp.ArrayLike, index: tp.Optional[tp.IndexLike] = None, columns: tp.Optional[tp.IndexLike] = None, fillna: tp.Optional[tp.Scalar] = None, dtype: tp.Optional[tp.PandasDTypeLike] = None, group_by: tp.GroupByLike = None, to_timedelta: bool = False, to_index: bool = False, silence_warnings: tp.Optional[bool] = None) -> tp.SeriesFrame: """Wrap a NumPy array using the stored metadata. Runs the following pipeline: 1) Converts to NumPy array 2) Fills NaN (optional) 3) Wraps using index, columns, and dtype (optional) 4) Converts to index (optional) 5) Converts to timedelta using `ArrayWrapper.to_timedelta` (optional)""" from vectorbt._settings import settings array_wrapper_cfg = settings['array_wrapper'] if silence_warnings is None: silence_warnings = array_wrapper_cfg['silence_warnings'] _self = self.resolve(group_by=group_by) if index is None: index = _self.index if not isinstance(index, pd.Index): index = pd.Index(index) if columns is None: columns = _self.columns if not isinstance(columns, pd.Index): columns = pd.Index(columns) if len(columns) == 1: name = columns[0] if name == 0: # was a Series before name = None else: name = None def _wrap(arr): arr = np.asarray(arr) checks.assert_ndim(arr, (1, 2)) if fillna is not None: arr[pd.isnull(arr)] = fillna arr = reshape_fns.soft_to_ndim(arr, self.ndim) checks.assert_shape_equal(arr, index, axis=(0, 0)) if arr.ndim == 2: checks.assert_shape_equal(arr, columns, axis=(1, 0)) if arr.ndim == 1: return pd.Series(arr, index=index, name=name, dtype=dtype) if arr.ndim == 2: if arr.shape[1] == 1 and _self.ndim == 1: return pd.Series(arr[:, 0], index=index, name=name, dtype=dtype) return pd.DataFrame(arr, index=index, columns=columns, dtype=dtype) raise ValueError(f"{arr.ndim}-d input is not supported") out = _wrap(arr) if to_index: # Convert to index if checks.is_series(out): out = out.map(lambda x: self.index[x] if x != -1 else np.nan) else: out = out.applymap(lambda x: self.index[x] if x != -1 else np.nan) if to_timedelta: # Convert to timedelta out = self.to_timedelta(out, silence_warnings=silence_warnings) return out
def broadcast(*args, index_from='default', columns_from='default', writeable=False, copy_kwargs={}, **kwargs): """Bring multiple arguments to the same shape.""" is_pd = False is_2d = False args = list(args) # Convert to np.ndarray object if not numpy or pandas for i in range(len(args)): if not checks.is_array_like(args[i]): args[i] = np.asarray(args[i]) if args[i].ndim > 1: is_2d = True if checks.is_pandas(args[i]): is_pd = True if is_pd: # Convert all pd.Series objects to pd.DataFrame if is_2d: for i in range(len(args)): if checks.is_series(args[i]): args[i] = args[i].to_frame() # Decide on index and columns if index_from == 'default': index_from = defaults.broadcast['index_from'] if columns_from == 'default': columns_from = defaults.broadcast['columns_from'] new_index = broadcast_index(*args, index_from=index_from, axis=0, is_2d=is_2d, **kwargs) new_columns = broadcast_index(*args, index_from=columns_from, axis=1, is_2d=is_2d, **kwargs) else: new_index, new_columns = None, None # Perform broadcasting operation if needed if is_broadcasting_needed(*args): new_args = np.broadcast_arrays(*args, subok=True) # The problem is that broadcasting creates readonly objects and numba requires writable ones. # So we have to copy all of them, which is ok for small-sized arrays and not ok for large ones. # copy kwarg is only applied when broadcasting was done to avoid deprecation warnings # NOTE: If copy=False, then the resulting arrays will be readonly in the future! new_args = list( map(lambda x: np.array(x, copy=writeable, **copy_kwargs), new_args)) else: # No copy here, just pandas -> numpy and any order to contiguous new_args = list( map(lambda x: np.array(x, copy=False, **copy_kwargs), args)) # Bring arrays to their old types (e.g. array -> pandas) for i in range(len(new_args)): new_args[i] = wrap_broadcasted(args[i], new_args[i], is_pd=is_pd, new_index=new_index, new_columns=new_columns) return tuple(new_args)
def broadcast_index(args, to_shape, index_from=None, axis=0, ignore_sr_names=None, **kwargs): """Produce a broadcast index/columns. Args: *args (array_like): Array-like objects. to_shape (tuple): Target shape. index_from (None, int, str or array_like): Broadcasting rule for this index/these columns. Accepts the following values: * 'default' - take the value from `vectorbt.settings.broadcasting` * 'strict' - ensure that all pandas objects have the same index/columns * 'stack' - stack different indexes/columns using `vectorbt.base.index_fns.stack_indexes` * 'ignore' - ignore any index/columns * integer - use the index/columns of the i-nth object in `args` * None - use the original index/columns of the objects in `args` * everything else will be converted to `pd.Index` axis (int): Set to 0 for index and 1 for columns. ignore_sr_names (bool): Whether to ignore Series names if they are in conflict. Conflicting Series names are those that are different but not None. **kwargs: Keyword arguments passed to `vectorbt.base.index_fns.stack_indexes`. For defaults, see `vectorbt.settings.broadcasting`. !!! note Series names are treated as columns with a single element but without a name. If a column level without a name loses its meaning, better to convert Series to DataFrames with one column prior to broadcasting. If the name of a Series is not that important, better to drop it altogether by setting it to None. """ from vectorbt import settings if ignore_sr_names is None: ignore_sr_names = settings.broadcasting['ignore_sr_names'] index_str = 'columns' if axis == 1 else 'index' to_shape_2d = (to_shape[0], 1) if len(to_shape) == 1 else to_shape # maxlen stores the length of the longest index maxlen = to_shape_2d[1] if axis == 1 else to_shape_2d[0] new_index = None if index_from is not None: if isinstance(index_from, int): # Take index/columns of the object indexed by index_from if not checks.is_pandas(args[index_from]): raise TypeError( f"Argument under index {index_from} must be a pandas object" ) new_index = index_fns.get_index(args[index_from], axis) elif isinstance(index_from, str): if index_from == 'ignore': # Ignore index/columns new_index = pd.RangeIndex(start=0, stop=maxlen, step=1) elif index_from in ('stack', 'strict'): # Check whether all indexes/columns are equal last_index = None # of type pd.Index index_conflict = False for arg in args: if checks.is_pandas(arg): index = index_fns.get_index(arg, axis) if last_index is not None: if not pd.Index.equals(index, last_index): index_conflict = True last_index = index continue if not index_conflict: new_index = last_index else: # If pandas objects have different index/columns, stack them together for arg in args: if checks.is_pandas(arg): index = index_fns.get_index(arg, axis) if axis == 1 and checks.is_series( arg) and ignore_sr_names: # ignore Series name continue if checks.is_default_index(index): # ignore simple ranges without name continue if new_index is None: new_index = index else: if index_from == 'strict': # If pandas objects have different index/columns, raise an exception if not pd.Index.equals(index, new_index): raise ValueError( f"Broadcasting {index_str} is not allowed when {index_str}_from=strict" ) # Broadcasting index must follow the rules of a regular broadcasting operation # https://docs.scipy.org/doc/numpy/user/basics.broadcasting.html#general-broadcasting-rules # 1. rule: if indexes are of the same length, they are simply stacked # 2. rule: if index has one element, it gets repeated and then stacked if pd.Index.equals(index, new_index): continue if len(index) != len(new_index): if len(index) > 1 and len(new_index) > 1: raise ValueError( "Indexes could not be broadcast together" ) if len(index) > len(new_index): new_index = index_fns.repeat_index( new_index, len(index)) elif len(index) < len(new_index): index = index_fns.repeat_index( index, len(new_index)) new_index = index_fns.stack_indexes( new_index, index, **kwargs) else: raise ValueError( f"Invalid value {index_from} for {'columns' if axis == 1 else 'index'}_from" ) else: new_index = index_from if new_index is not None: if maxlen > len(new_index): if index_from == 'strict': raise ValueError( f"Broadcasting {index_str} is not allowed when {index_str}_from=strict" ) # This happens only when some numpy object is longer than the new pandas index # In this case, new pandas index (one element) should be repeated to match this length. if maxlen > 1 and len(new_index) > 1: raise ValueError("Indexes could not be broadcast together") new_index = index_fns.repeat_index(new_index, maxlen) elif index_from is not None: # new_index=None can mean two things: 1) take original metadata or 2) reset index/columns # In case when index_from is not None, we choose 2) new_index = pd.RangeIndex(start=0, stop=maxlen, step=1) return new_index
def broadcast(*args, to_shape=None, to_pd=None, to_frame=None, align_index=None, align_columns=None, index_from='default', columns_from='default', require_kwargs=None, keep_raw=False, return_meta=False, **kwargs): """Bring any array-like object in `args` to the same shape by using NumPy broadcasting. See [Broadcasting](https://docs.scipy.org/doc/numpy/user/basics.broadcasting.html). Can broadcast pandas objects by broadcasting their index/columns with `broadcast_index`. Args: *args (array_like): Array-like objects. to_shape (tuple): Target shape. If set, will broadcast every element in `args` to `to_shape`. to_pd (bool, tuple or list): Whether to convert all output arrays to pandas, otherwise returns raw NumPy arrays. If None, converts only if there is at least one pandas object among them. to_frame (bool): Whether to convert all Series to DataFrames. align_index (bool): Whether to align index of pandas objects using multi-index. align_columns (bool): Whether to align columns of pandas objects using multi-index. index_from (any): Broadcasting rule for index. columns_from (any): Broadcasting rule for columns. require_kwargs (dict or list of dict): Keyword arguments passed to `np.require`. keep_raw (bool, tuple or list): Whether to keep the unbroadcasted version of the array. Only makes sure that the array can be broadcast to the target shape. return_meta (bool): If True, will also return new shape, index and columns. **kwargs: Keyword arguments passed to `broadcast_index`. For defaults, see `vectorbt.settings.broadcasting`. ## Example Without broadcasting index and columns: ```python-repl >>> import numpy as np >>> import pandas as pd >>> from vectorbt.base.reshape_fns import broadcast >>> v = 0 >>> a = np.array([1, 2, 3]) >>> sr = pd.Series([1, 2, 3], index=pd.Index(['x', 'y', 'z']), name='a') >>> df = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], ... index=pd.Index(['x2', 'y2', 'z2']), ... columns=pd.Index(['a2', 'b2', 'c2'])) >>> for i in broadcast( ... v, a, sr, df, ... index_from=None, ... columns_from=None, ... ): print(i) 0 1 2 0 0 0 0 1 0 0 0 2 0 0 0 0 1 2 0 1 2 3 1 1 2 3 2 1 2 3 a a a x 1 1 1 y 2 2 2 z 3 3 3 a2 b2 c2 x2 1 2 3 y2 4 5 6 z2 7 8 9 ``` Taking new index and columns from position: ```python-repl >>> for i in broadcast( ... v, a, sr, df, ... index_from=2, ... columns_from=3 ... ): print(i) a2 b2 c2 x 0 0 0 y 0 0 0 z 0 0 0 a2 b2 c2 x 1 2 3 y 1 2 3 z 1 2 3 a2 b2 c2 x 1 1 1 y 2 2 2 z 3 3 3 a2 b2 c2 x 1 2 3 y 4 5 6 z 7 8 9 ``` Broadcasting index and columns through stacking: ```python-repl >>> for i in broadcast( ... v, a, sr, df, ... index_from='stack', ... columns_from='stack' ... ): print(i) a2 b2 c2 x x2 0 0 0 y y2 0 0 0 z z2 0 0 0 a2 b2 c2 x x2 1 2 3 y y2 1 2 3 z z2 1 2 3 a2 b2 c2 x x2 1 1 1 y y2 2 2 2 z z2 3 3 3 a2 b2 c2 x x2 1 2 3 y y2 4 5 6 z z2 7 8 9 ``` Setting index and columns manually: ```python-repl >>> for i in broadcast( ... v, a, sr, df, ... index_from=['a', 'b', 'c'], ... columns_from=['d', 'e', 'f'] ... ): print(i) d e f a 0 0 0 b 0 0 0 c 0 0 0 d e f a 1 2 3 b 1 2 3 c 1 2 3 d e f a 1 1 1 b 2 2 2 c 3 3 3 d e f a 1 2 3 b 4 5 6 c 7 8 9 ``` """ from vectorbt import settings is_pd = False is_2d = False args = list(args) if require_kwargs is None: require_kwargs = {} if align_index is None: align_index = settings.broadcasting['align_index'] if align_columns is None: align_columns = settings.broadcasting['align_columns'] if isinstance(index_from, str) and index_from == 'default': index_from = settings.broadcasting['index_from'] if isinstance(columns_from, str) and columns_from == 'default': columns_from = settings.broadcasting['columns_from'] # Convert to np.ndarray object if not numpy or pandas # Also check whether we broadcast to pandas and whether work on 2-dim data for i in range(len(args)): if not checks.is_array(args[i]): args[i] = np.asarray(args[i]) if args[i].ndim > 1: is_2d = True if checks.is_pandas(args[i]): is_pd = True # If target shape specified, check again if we work on 2-dim data if to_shape is not None: if isinstance(to_shape, int): to_shape = (to_shape, ) checks.assert_type(to_shape, tuple) if len(to_shape) > 1: is_2d = True if to_frame is not None: # force either keeping Series or converting them to DataFrames is_2d = to_frame if to_pd is not None: # force either raw or pandas if isinstance(to_pd, (tuple, list)): is_pd = any(to_pd) else: is_pd = to_pd # Align pandas objects if align_index: index_to_align = [] for i in range(len(args)): if checks.is_pandas(args[i]) and len(args[i].index) > 1: index_to_align.append(i) if len(index_to_align) > 1: indexes = [args[i].index for i in index_to_align] if len(set(map(len, indexes))) > 1: index_indices = index_fns.align_indexes(*indexes) for i in range(len(args)): if i in index_to_align: args[i] = args[i].iloc[index_indices[ index_to_align.index(i)]] if align_columns: cols_to_align = [] for i in range(len(args)): if checks.is_frame(args[i]) and len(args[i].columns) > 1: cols_to_align.append(i) if len(cols_to_align) > 1: indexes = [args[i].columns for i in cols_to_align] if len(set(map(len, indexes))) > 1: col_indices = index_fns.align_indexes(*indexes) for i in range(len(args)): if i in cols_to_align: args[i] = args[i].iloc[:, col_indices[cols_to_align. index(i)]] # Convert all pd.Series objects to pd.DataFrame if we work on 2-dim data args_2d = [ arg.to_frame() if is_2d and checks.is_series(arg) else arg for arg in args ] # Get final shape if to_shape is None: to_shape = np.lib.stride_tricks._broadcast_shape(*args_2d) # Perform broadcasting new_args = [] for i, arg in enumerate(args_2d): if isinstance(keep_raw, (tuple, list)): _keep_raw = keep_raw[i] else: _keep_raw = keep_raw bc_arg = np.broadcast_to(arg, to_shape) if _keep_raw: new_args.append(arg) continue new_args.append(bc_arg) # Force to match requirements for i in range(len(new_args)): if isinstance(require_kwargs, (tuple, list)): _require_kwargs = require_kwargs[i] else: _require_kwargs = require_kwargs new_args[i] = np.require(new_args[i], **_require_kwargs) if is_pd: # Decide on index and columns # NOTE: Important to pass args, not args_2d, to preserve original shape info new_index = broadcast_index(args, to_shape, index_from=index_from, axis=0, **kwargs) new_columns = broadcast_index(args, to_shape, index_from=columns_from, axis=1, **kwargs) else: new_index, new_columns = None, None # Bring arrays to their old types (e.g. array -> pandas) for i in range(len(new_args)): if isinstance(keep_raw, (tuple, list)): _keep_raw = keep_raw[i] else: _keep_raw = keep_raw if _keep_raw: continue if isinstance(to_pd, (tuple, list)): _is_pd = to_pd[i] else: _is_pd = is_pd new_args[i] = wrap_broadcasted(args[i], new_args[i], is_pd=_is_pd, new_index=new_index, new_columns=new_columns) if len(new_args) > 1: if return_meta: return tuple(new_args), to_shape, new_index, new_columns return tuple(new_args) if return_meta: return new_args[0], to_shape, new_index, new_columns return new_args[0]
def test_is_series(self): assert not checks.is_series(0) assert not checks.is_series(np.array([0])) assert checks.is_series(pd.Series([1, 2, 3])) assert not checks.is_series(pd.DataFrame([1, 2, 3]))