def wrap_metric(self, a): if checks.is_frame(self.ts): return pd.Series(a, index=self.ts.columns) # Single value if checks.is_array(a): return a[0] return a
def unstack_to_df(arg, index_levels=None, column_levels=None, symmetric=False, sort=True): """Reshape `arg` based on its multi-index into a DataFrame. Use `index_levels` to specify what index levels will form new index, and `column_levels` for new columns. Set `symmetric` to True to make DataFrame symmetric. ## Example ```python-repl >>> import pandas as pd >>> from vectorbt.base.reshape_fns import unstack_to_df >>> index = pd.MultiIndex.from_arrays( ... [[1, 1, 2, 2], [3, 4, 3, 4], ['a', 'b', 'c', 'd']], ... names=['x', 'y', 'z']) >>> sr = pd.Series([1, 2, 3, 4], index=index) >>> unstack_to_df(sr, index_levels=(0, 1), column_levels=2) z a b c d x y 1 3 1.0 NaN NaN NaN 1 4 NaN 2.0 NaN NaN 2 3 NaN NaN 3.0 NaN 2 4 NaN NaN NaN 4.0 ``` """ # Perform checks checks.assert_type(arg, (pd.Series, pd.DataFrame)) if checks.is_frame(arg): if arg.shape[0] == 1: arg = arg.iloc[0, :] elif arg.shape[1] == 1: arg = arg.iloc[:, 0] checks.assert_type(arg.index, pd.MultiIndex) sr = to_1d(arg) if len(sr.index.levels) > 2: if index_levels is None: raise ValueError("index_levels must be specified") if column_levels is None: raise ValueError("column_levels must be specified") else: index_levels = 0 column_levels = 1 # Build new index and column hierarchies new_index = index_fns.select_levels(arg.index, index_levels).unique() new_columns = index_fns.select_levels(arg.index, column_levels).unique() # Unstack and post-process unstacked = unstack_to_array(sr, levels=(index_levels, column_levels)) df = pd.DataFrame(unstacked, index=new_index, columns=new_columns) if symmetric: return make_symmetric(df, sort=sort) return df
def __call__(self, trace_names=None, **kwargs): if trace_names is None: if checks.is_frame(self._obj) or (checks.is_series(self._obj) and self._obj.name is not None): trace_names = reshape_fns.to_2d(self._obj).columns return widgets.Histogram(trace_names=trace_names, data=self._obj.values, **kwargs)
def __init__(self, main_price, init_capital, orders, cash, shares, freq=None, year_freq=None, levy_alpha=None, risk_free=None, required_return=None, cutoff=None, factor_returns=None, incl_unrealized_stats=False): # Perform checks checks.assert_type(main_price, (pd.Series, pd.DataFrame)) if checks.is_frame(main_price): checks.assert_type(init_capital, pd.Series) checks.assert_same(main_price.columns, init_capital.index) else: checks.assert_ndim(init_capital, 0) checks.assert_same_meta(main_price, cash) checks.assert_same_meta(main_price, shares) # Store passed arguments self._main_price = main_price self._init_capital = init_capital self._orders = orders self._cash = cash self._shares = shares self._incl_unrealized_stats = incl_unrealized_stats freq = main_price.vbt(freq=freq).freq if freq is None: raise ValueError( "Couldn't parse the frequency of index. You must set `freq`.") self._freq = freq year_freq = main_price.vbt.returns(year_freq=year_freq).year_freq if freq is None: raise ValueError("You must set `year_freq`.") self._year_freq = year_freq # Parameters self._levy_alpha = defaults.portfolio[ 'levy_alpha'] if levy_alpha is None else levy_alpha self._risk_free = defaults.portfolio[ 'risk_free'] if risk_free is None else risk_free self._required_return = defaults.portfolio[ 'required_return'] if required_return is None else required_return self._cutoff = defaults.portfolio[ 'cutoff'] if cutoff is None else cutoff self._factor_returns = defaults.portfolio[ 'factor_returns'] if factor_returns is None else factor_returns # Supercharge PandasIndexer.__init__(self, _indexing_func) self.wrapper = ArrayWrapper.from_obj(main_price, freq=freq)
def from_obj(cls, obj): """Derive metadata from an object.""" index = obj.index if checks.is_frame(obj): columns = obj.columns else: columns = [obj.name] ndim = obj.ndim return cls(index=index, columns=columns, ndim=ndim)
def pd_indexing_func(obj): new_obj = obj.iloc[:, indices] if not is_multiple: # If we selected only one param, then remove its columns levels to keep it clean if level_name is not None: if checks.is_frame(new_obj): if isinstance(new_obj.columns, pd.MultiIndex): new_obj.columns = index_fns.drop_levels(new_obj.columns, level_name) return new_obj
def __call__(self, x_labels=None, trace_names=None, **kwargs): if x_labels is None: x_labels = self._obj.index if trace_names is None: if checks.is_frame(self._obj) or (checks.is_series(self._obj) and self._obj.name is not None): trace_names = reshape_fns.to_2d(self._obj).columns return widgets.Scatter(x_labels, trace_names=trace_names, data=self._obj.values, **kwargs)
def wrap_array_as(arg1, arg2, **kwargs): """Wrap array `arg1` to be as `arg2`.""" default_index = arg2.index if checks.is_frame(arg2): default_columns = arg2.columns else: default_columns = [arg2.name] return wrap_array(arg1, default_index=default_index, default_columns=default_columns, to_ndim=arg2.ndim, **kwargs)
def mapper_indexing_func(mapper, ref_obj, pd_indexing_func): """Broadcast `mapper` Series to `ref_obj` and perform pandas indexing using `pd_indexing_func`.""" checks.assert_type(mapper, pd.Series) checks.assert_type(ref_obj, (pd.Series, pd.DataFrame)) df_range_mapper = reshape_fns.broadcast_to(np.arange(len(mapper.index)), ref_obj) loced_range_mapper = pd_indexing_func(df_range_mapper) new_mapper = mapper.iloc[loced_range_mapper.values[0]] if checks.is_frame(loced_range_mapper): return pd.Series(new_mapper.values, index=loced_range_mapper.columns, name=mapper.name) elif checks.is_series(loced_range_mapper): return pd.Series([new_mapper], index=[loced_range_mapper.name], name=mapper.name)
def indexing_on_mapper(mapper: tp.Series, ref_obj: tp.SeriesFrame, pd_indexing_func: tp.Callable) -> tp.Optional[tp.Series]: """Broadcast `mapper` Series to `ref_obj` and perform pandas indexing using `pd_indexing_func`.""" checks.assert_instance_of(mapper, pd.Series) checks.assert_instance_of(ref_obj, (pd.Series, pd.DataFrame)) df_range_mapper = reshape_fns.broadcast_to(np.arange(len(mapper.index)), ref_obj) loced_range_mapper = pd_indexing_func(df_range_mapper) new_mapper = mapper.iloc[loced_range_mapper.values[0]] if checks.is_frame(loced_range_mapper): return pd.Series(new_mapper.values, index=loced_range_mapper.columns, name=mapper.name) elif checks.is_series(loced_range_mapper): return pd.Series([new_mapper], index=[loced_range_mapper.name], name=mapper.name) return None
def get_multiindex_series(arg: tp.SeriesFrame) -> tp.Series: """Get Series with a multi-index. If DataFrame has been passed, should at maximum have one row or column.""" checks.assert_instance_of(arg, (pd.Series, pd.DataFrame)) if checks.is_frame(arg): if arg.shape[0] == 1: arg = arg.iloc[0, :] elif arg.shape[1] == 1: arg = arg.iloc[:, 0] else: raise ValueError( "Supported are either Series or DataFrame with one column/row") checks.assert_instance_of(arg.index, pd.MultiIndex) return arg
def soft_to_ndim(arg, ndim): """Try to softly bring `arg` to the specified number of dimensions `ndim` (max 2).""" if not checks.is_array(arg): arg = np.asarray(arg) if ndim == 1: if arg.ndim == 2: if arg.shape[1] == 1: if checks.is_frame(arg): return arg.iloc[:, 0] return arg[:, 0] # downgrade if ndim == 2: if arg.ndim == 1: if checks.is_series(arg): return arg.to_frame() return arg[:, None] # upgrade return arg # do nothing
def to_1d(arg, raw=False): """Reshape argument to one dimension.""" if raw: arg = np.asarray(arg) if not checks.is_array_like(arg): arg = np.asarray(arg) if arg.ndim == 2: if arg.shape[1] == 1: if checks.is_frame(arg): return arg.iloc[:, 0] return arg[:, 0] if arg.ndim == 1: return arg elif arg.ndim == 0: return arg.reshape((1, )) raise ValueError( f"Cannot reshape a {arg.ndim}-dimensional array to 1 dimension")
def soft_to_ndim(arg: tp.ArrayLike, ndim: int, raw: bool = False) -> tp.AnyArray: """Try to softly bring `arg` to the specified number of dimensions `ndim` (max 2).""" arg = to_any_array(arg, raw=raw) if ndim == 1: if arg.ndim == 2: if arg.shape[1] == 1: if checks.is_frame(arg): return arg.iloc[:, 0] return arg[:, 0] # downgrade if ndim == 2: if arg.ndim == 1: if checks.is_series(arg): return arg.to_frame() return arg[:, None] # upgrade return arg # do nothing
def to_1d(arg: tp.ArrayLike, raw: bool = False) -> tp.AnyArray1d: """Reshape argument to one dimension. If `raw` is True, returns NumPy array. If 2-dim, will collapse along axis 1 (i.e., DataFrame with one column to Series).""" arg = to_any_array(arg, raw=raw) if arg.ndim == 2: if arg.shape[1] == 1: if checks.is_frame(arg): return arg.iloc[:, 0] return arg[:, 0] if arg.ndim == 1: return arg elif arg.ndim == 0: return arg.reshape((1, )) raise ValueError( f"Cannot reshape a {arg.ndim}-dimensional array to 1 dimension")
def to_1d(arg, raw=False): """Reshape argument to one dimension. If `raw` is `True`, returns NumPy array. If 2-dim, will collapse along axis 1 (i.e., DataFrame with one column to Series).""" if raw or not checks.is_array(arg): arg = np.asarray(arg) if arg.ndim == 2: if arg.shape[1] == 1: if checks.is_frame(arg): return arg.iloc[:, 0] return arg[:, 0] if arg.ndim == 1: return arg elif arg.ndim == 0: return arg.reshape((1, )) raise ValueError( f"Cannot reshape a {arg.ndim}-dimensional array to 1 dimension")
def _indexing_func(obj, pd_indexing_func): """Perform indexing on `Portfolio`.""" if obj.wrapper.ndim == 1: raise TypeError("Indexing on Series is not supported") n_rows = len(obj.wrapper.index) n_cols = len(obj.wrapper.columns) col_mapper = obj.wrapper.wrap( np.broadcast_to(np.arange(n_cols), (n_rows, n_cols))) col_mapper = pd_indexing_func(col_mapper) if not pd.Index.equals(col_mapper.index, obj.wrapper.index): raise NotImplementedError( "Changing index (time axis) is not supported") new_cols = col_mapper.values[0] # Array-like params def index_arraylike_param(param): if np.asarray(param).ndim > 0: param = reshape_fns.broadcast_to_axis_of(param, obj.main_price, 1) param = param[new_cols] return param factor_returns = obj.factor_returns if factor_returns is not None: if checks.is_frame(factor_returns): factor_returns = reshape_fns.broadcast_to(factor_returns, obj.main_price) factor_returns = pd_indexing_func(factor_returns) # Create new Portfolio instance return obj.__class__( pd_indexing_func(obj.main_price), obj.init_capital.iloc[new_cols], pd_indexing_func(obj.orders), # Orders class supports indexing pd_indexing_func(obj.cash), pd_indexing_func(obj.shares), freq=obj.freq, year_freq=obj.year_freq, levy_alpha=index_arraylike_param(obj.levy_alpha), risk_free=index_arraylike_param(obj.risk_free), required_return=index_arraylike_param(obj.required_return), cutoff=index_arraylike_param(obj.cutoff), factor_returns=factor_returns, incl_unrealized_stats=obj.incl_unrealized_stats)
def unstack_to_df(arg, index_levels=None, column_levels=None, symmetric=False): """Reshape object based on multi-index into dataframe.""" # Perform checks checks.assert_type(arg, (pd.Series, pd.DataFrame)) if checks.is_frame(arg): if arg.shape[0] == 1: arg = arg.iloc[0, :] elif arg.shape[1] == 1: arg = arg.iloc[:, 0] checks.assert_type(arg.index, pd.MultiIndex) sr = to_1d(arg) if len(sr.index.levels) > 2: checks.assert_not_none(index_levels) checks.assert_not_none(column_levels) else: index_levels = 0 column_levels = 1 # Build new index and column hierarchies new_index = np.unique(index_fns.select_levels(arg.index, index_levels)) new_columns = np.unique(index_fns.select_levels(arg.index, column_levels)) if isinstance(index_levels, (list, tuple)): new_index = pd.MultiIndex.from_tuples(new_index, names=index_levels) else: new_index = pd.Index(new_index, name=index_levels) if isinstance(column_levels, (list, tuple)): new_columns = pd.MultiIndex.from_tuples(new_columns, names=column_levels) else: new_columns = pd.Index(new_columns, name=column_levels) # Unstack and post-process unstacked = unstack_to_array(sr, levels=(index_levels, column_levels)) df = pd.DataFrame(unstacked, index=new_index, columns=new_columns) if symmetric: return make_symmetric(df) return df
def from_params_pipeline(ts_list, param_list, level_names, num_outputs, custom_func, *args, pass_lists=False, pass_2d=True, param_product=False, broadcast_kwargs={}, return_raw=False, **kwargs): """A pipeline for calculating an indicator, used by `IndicatorFactory`. Args: ts_list (list of array_like): A list of time series objects. At least one must be a pandas object. param_list (list of array_like): A list of parameters. Each element is either an array-like object or a single value of any type. level_names (list of str): A list of column level names corresponding to each parameter. num_outputs (int): The number of output arrays. custom_func (function): A custom calculation function. See `IndicatorFactory.from_custom_func`. *args: Arguments passed to the `custom_func`. pass_lists (bool): If `True`, arguments are passed to the `custom_func` as lists. pass_2d (bool): If `True`, time series arrays will be passed as two-dimensional, otherwise as is. param_product (bool): If `True`, builds a Cartesian product out of all parameters. broadcast_kwargs (dict): Keyword arguments passed to the `vectorbt.base.reshape_fns.broadcast` on time series objects. return_raw (bool): If `True`, returns the raw output without post-processing. **kwargs: Keyword arguments passed to the `custom_func`. Some common arguments include `return_cache` to return cache and `cache` to pass cache. Those are only applicable to `custom_func` that supports it (`custom_func` created using `IndicatorFactory.from_apply_func` are supported by default). Returns: A list of transformed inputs (`pandas_like`), a list of generated outputs (`pandas_like`), a list of parameter arrays (`np.ndarray`), a list of parameter mappers (`pd.Series`), a list of other generated outputs that are outside of `num_outputs`. Explanation: Does the following: * Takes one or multiple time series objects in `ts_list` and broadcasts them. For example: ```python-repl >>> sr = pd.Series([1, 2], index=['x', 'y']) >>> df = pd.DataFrame([[3, 4], [5, 6]], index=['x', 'y'], columns=['a', 'b']) >>> ts_list = [sr, df] >>> ts_list = vbt.base.reshape_fns.broadcast(*ts_list) >>> print(ts_list[0]) a b x 1 1 y 2 2 >>> print(ts_list[1]) a b x 3 4 y 5 6 ``` * Takes one or multiple parameters in `param_list`, converts them to NumPy arrays and broadcasts them. For example: ```python-repl >>> p1, p2, p3 = 1, [2, 3, 4], [False] >>> param_list = [p1, p2, p3] >>> param_list = vbt.base.reshape_fns.broadcast(*param_list) >>> print(param_list[0]) array([1, 1, 1]) >>> print(param_list[1]) array([2, 3, 4]) >>> print(param_list[2]) array([False, False, False]) ``` * Performs calculation using `custom_func` to build output arrays (`output_list`) and other objects (`other_list`, optionally). For example: ```python-repl >>> def custom_func(ts1, ts2, p1, p2, p3, *args, **kwargs): ... return np.hstack(( ... ts1 + ts2 + p1[0] * p2[0], ... ts1 + ts2 + p1[1] * p2[1], ... ts1 + ts2 + p1[2] * p2[2], ... )) >>> output = custom_func(*ts_list, *param_list) >>> print(output) array([[ 6, 7, 7, 8, 8, 9], [ 9, 10, 10, 11, 11, 12]]) ``` * Creates new column hierarchy based on parameters and level names. For example: ```python-repl >>> p1_columns = pd.Index(param_list[0], name='p1') >>> p2_columns = pd.Index(param_list[1], name='p2') >>> p3_columns = pd.Index(param_list[2], name='p3') >>> p_columns = vbt.base.index_fns.stack_indexes(p1_columns, p2_columns, p3_columns) >>> new_columns = vbt.base.index_fns.combine_indexes(p_columns, ts_list[0].columns) >>> output_df = pd.DataFrame(output, columns=new_columns) >>> print(output_df) p1 1 p2 2 3 4 p3 False False False False False False a b a b a b 0 6 7 7 8 8 9 1 9 10 10 11 11 12 ``` * Broadcasts objects in `ts_list` to match the shape of objects in `output_list` through tiling. This is done to be able to compare them and generate signals, since you cannot compare NumPy arrays that have totally different shapes, such as (2, 2) and (2, 6). For example: ```python-repl >>> new_ts_list = [ ... ts_list[0].vbt.tile(len(param_list[0]), keys=p_columns), ... ts_list[1].vbt.tile(len(param_list[0]), keys=p_columns) ... ] >>> print(new_ts_list[0]) p1 1 p2 2 3 4 p3 False False False False False False a b a b a b 0 1 1 1 1 1 1 1 2 2 2 2 2 2 ``` * Builds parameter mappers that will link parameters from `param_list` to columns in `ts_list` and `output_list`. This is done to enable column indexing using parameter values. """ if len(ts_list) > 1: # Broadcast time series ts_list = reshape_fns.broadcast(*ts_list, **broadcast_kwargs, writeable=True) # Check time series objects checks.assert_type(ts_list[0], (pd.Series, pd.DataFrame)) # Convert params to 1-dim arrays param_list = list(map(reshape_fns.to_1d, param_list)) if len(param_list) > 1: # Check level names checks.assert_type(level_names, (list, tuple)) checks.assert_same_len(param_list, level_names) for ts in ts_list: # Every time series object should be free of the specified level names in its columns for level_name in level_names: if level_name is not None: if checks.is_frame(ts): checks.assert_level_not_exists(ts.columns, level_name) if param_product: # Make Cartesian product out of all params param_list = create_param_product(param_list) else: # Broadcast such that each array has the same length param_list = reshape_fns.broadcast(*param_list, writeable=True) # Perform main calculation if pass_2d: array_list = tuple( map(lambda x: reshape_fns.to_2d(np.asarray(x)), ts_list)) else: array_list = tuple(map(lambda x: np.asarray(x), ts_list)) if pass_lists: output_list = custom_func(array_list, param_list, *args, **kwargs) else: output_list = custom_func(*array_list, *param_list, *args, **kwargs) if return_raw or kwargs.get('return_cache', False): return output_list # return raw cache outputs if not isinstance(output_list, (tuple, list, List)): output_list = [output_list] else: output_list = list(output_list) # Other outputs should be returned without post-processing (for example cache_dict) if len(output_list) > num_outputs: other_list = output_list[num_outputs:] else: other_list = [] # Process only the num_outputs outputs output_list = output_list[:num_outputs] if len(param_list) > 0: # Build new column levels on top of time series levels new_columns = build_column_hierarchy(param_list, level_names, ts_list[0].vbt.columns) # Wrap into new pandas objects both time series and output objects new_ts_list = list( map(lambda x: broadcast_ts(x, param_list[0].shape[0], new_columns), ts_list)) # Build mappers to easily map between parameters and columns mapper_list = [ build_mapper(x, ts_list[0], new_columns, level_names[i]) for i, x in enumerate(param_list) ] else: # Some indicators don't have any params new_columns = ts_list[0].vbt.columns new_ts_list = list(ts_list) mapper_list = [] output_list = list( map(lambda x: wrap_output(x, ts_list[0], new_columns), output_list)) if len(mapper_list) > 1: # Tuple object is a mapper that accepts tuples of parameters tuple_mapper = build_tuple_mapper(mapper_list, new_columns, tuple(level_names)) mapper_list.append(tuple_mapper) return new_ts_list, output_list, param_list, mapper_list, other_list
def broadcast(*args, to_shape=None, to_pd=None, to_frame=None, align_index=None, align_columns=None, index_from='default', columns_from='default', require_kwargs=None, keep_raw=False, return_meta=False, **kwargs): """Bring any array-like object in `args` to the same shape by using NumPy broadcasting. See [Broadcasting](https://docs.scipy.org/doc/numpy/user/basics.broadcasting.html). Can broadcast pandas objects by broadcasting their index/columns with `broadcast_index`. Args: *args (array_like): Array-like objects. to_shape (tuple): Target shape. If set, will broadcast every element in `args` to `to_shape`. to_pd (bool, tuple or list): Whether to convert all output arrays to pandas, otherwise returns raw NumPy arrays. If None, converts only if there is at least one pandas object among them. to_frame (bool): Whether to convert all Series to DataFrames. align_index (bool): Whether to align index of pandas objects using multi-index. align_columns (bool): Whether to align columns of pandas objects using multi-index. index_from (any): Broadcasting rule for index. columns_from (any): Broadcasting rule for columns. require_kwargs (dict or list of dict): Keyword arguments passed to `np.require`. keep_raw (bool, tuple or list): Whether to keep the unbroadcasted version of the array. Only makes sure that the array can be broadcast to the target shape. return_meta (bool): If True, will also return new shape, index and columns. **kwargs: Keyword arguments passed to `broadcast_index`. For defaults, see `vectorbt.settings.broadcasting`. ## Example Without broadcasting index and columns: ```python-repl >>> import numpy as np >>> import pandas as pd >>> from vectorbt.base.reshape_fns import broadcast >>> v = 0 >>> a = np.array([1, 2, 3]) >>> sr = pd.Series([1, 2, 3], index=pd.Index(['x', 'y', 'z']), name='a') >>> df = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], ... index=pd.Index(['x2', 'y2', 'z2']), ... columns=pd.Index(['a2', 'b2', 'c2'])) >>> for i in broadcast( ... v, a, sr, df, ... index_from=None, ... columns_from=None, ... ): print(i) 0 1 2 0 0 0 0 1 0 0 0 2 0 0 0 0 1 2 0 1 2 3 1 1 2 3 2 1 2 3 a a a x 1 1 1 y 2 2 2 z 3 3 3 a2 b2 c2 x2 1 2 3 y2 4 5 6 z2 7 8 9 ``` Taking new index and columns from position: ```python-repl >>> for i in broadcast( ... v, a, sr, df, ... index_from=2, ... columns_from=3 ... ): print(i) a2 b2 c2 x 0 0 0 y 0 0 0 z 0 0 0 a2 b2 c2 x 1 2 3 y 1 2 3 z 1 2 3 a2 b2 c2 x 1 1 1 y 2 2 2 z 3 3 3 a2 b2 c2 x 1 2 3 y 4 5 6 z 7 8 9 ``` Broadcasting index and columns through stacking: ```python-repl >>> for i in broadcast( ... v, a, sr, df, ... index_from='stack', ... columns_from='stack' ... ): print(i) a2 b2 c2 x x2 0 0 0 y y2 0 0 0 z z2 0 0 0 a2 b2 c2 x x2 1 2 3 y y2 1 2 3 z z2 1 2 3 a2 b2 c2 x x2 1 1 1 y y2 2 2 2 z z2 3 3 3 a2 b2 c2 x x2 1 2 3 y y2 4 5 6 z z2 7 8 9 ``` Setting index and columns manually: ```python-repl >>> for i in broadcast( ... v, a, sr, df, ... index_from=['a', 'b', 'c'], ... columns_from=['d', 'e', 'f'] ... ): print(i) d e f a 0 0 0 b 0 0 0 c 0 0 0 d e f a 1 2 3 b 1 2 3 c 1 2 3 d e f a 1 1 1 b 2 2 2 c 3 3 3 d e f a 1 2 3 b 4 5 6 c 7 8 9 ``` """ from vectorbt import settings is_pd = False is_2d = False args = list(args) if require_kwargs is None: require_kwargs = {} if align_index is None: align_index = settings.broadcasting['align_index'] if align_columns is None: align_columns = settings.broadcasting['align_columns'] if isinstance(index_from, str) and index_from == 'default': index_from = settings.broadcasting['index_from'] if isinstance(columns_from, str) and columns_from == 'default': columns_from = settings.broadcasting['columns_from'] # Convert to np.ndarray object if not numpy or pandas # Also check whether we broadcast to pandas and whether work on 2-dim data for i in range(len(args)): if not checks.is_array(args[i]): args[i] = np.asarray(args[i]) if args[i].ndim > 1: is_2d = True if checks.is_pandas(args[i]): is_pd = True # If target shape specified, check again if we work on 2-dim data if to_shape is not None: if isinstance(to_shape, int): to_shape = (to_shape, ) checks.assert_type(to_shape, tuple) if len(to_shape) > 1: is_2d = True if to_frame is not None: # force either keeping Series or converting them to DataFrames is_2d = to_frame if to_pd is not None: # force either raw or pandas if isinstance(to_pd, (tuple, list)): is_pd = any(to_pd) else: is_pd = to_pd # Align pandas objects if align_index: index_to_align = [] for i in range(len(args)): if checks.is_pandas(args[i]) and len(args[i].index) > 1: index_to_align.append(i) if len(index_to_align) > 1: indexes = [args[i].index for i in index_to_align] if len(set(map(len, indexes))) > 1: index_indices = index_fns.align_indexes(*indexes) for i in range(len(args)): if i in index_to_align: args[i] = args[i].iloc[index_indices[ index_to_align.index(i)]] if align_columns: cols_to_align = [] for i in range(len(args)): if checks.is_frame(args[i]) and len(args[i].columns) > 1: cols_to_align.append(i) if len(cols_to_align) > 1: indexes = [args[i].columns for i in cols_to_align] if len(set(map(len, indexes))) > 1: col_indices = index_fns.align_indexes(*indexes) for i in range(len(args)): if i in cols_to_align: args[i] = args[i].iloc[:, col_indices[cols_to_align. index(i)]] # Convert all pd.Series objects to pd.DataFrame if we work on 2-dim data args_2d = [ arg.to_frame() if is_2d and checks.is_series(arg) else arg for arg in args ] # Get final shape if to_shape is None: to_shape = np.lib.stride_tricks._broadcast_shape(*args_2d) # Perform broadcasting new_args = [] for i, arg in enumerate(args_2d): if isinstance(keep_raw, (tuple, list)): _keep_raw = keep_raw[i] else: _keep_raw = keep_raw bc_arg = np.broadcast_to(arg, to_shape) if _keep_raw: new_args.append(arg) continue new_args.append(bc_arg) # Force to match requirements for i in range(len(new_args)): if isinstance(require_kwargs, (tuple, list)): _require_kwargs = require_kwargs[i] else: _require_kwargs = require_kwargs new_args[i] = np.require(new_args[i], **_require_kwargs) if is_pd: # Decide on index and columns # NOTE: Important to pass args, not args_2d, to preserve original shape info new_index = broadcast_index(args, to_shape, index_from=index_from, axis=0, **kwargs) new_columns = broadcast_index(args, to_shape, index_from=columns_from, axis=1, **kwargs) else: new_index, new_columns = None, None # Bring arrays to their old types (e.g. array -> pandas) for i in range(len(new_args)): if isinstance(keep_raw, (tuple, list)): _keep_raw = keep_raw[i] else: _keep_raw = keep_raw if _keep_raw: continue if isinstance(to_pd, (tuple, list)): _is_pd = to_pd[i] else: _is_pd = is_pd new_args[i] = wrap_broadcasted(args[i], new_args[i], is_pd=_is_pd, new_index=new_index, new_columns=new_columns) if len(new_args) > 1: if return_meta: return tuple(new_args), to_shape, new_index, new_columns return tuple(new_args) if return_meta: return new_args[0], to_shape, new_index, new_columns return new_args[0]
def _indexing_func_meta(self, pd_indexing_func, index=None, columns=None, column_only_select=None, group_select=None, group_by=None): """Perform indexing on `ArrayWrapper` and also return indexing metadata. Takes into account column grouping. Set `column_only_select` to True to index the array wrapper as a Series of columns. This way, selection of index (axis 0) can be avoided. Set `group_select` to True to select groups rather than columns. Takes effect only if grouping is enabled. !!! note If `column_only_select` is True, make sure to index the array wrapper as a Series of columns rather than a DataFrame. For example, the operation `.iloc[:, :2]` should become `.iloc[:2]`. Operations are not allowed if the object is already a Series and thus has only one column/group.""" from vectorbt import settings if column_only_select is None: column_only_select = self.column_only_select if column_only_select is None: column_only_select = settings.array_wrapper['column_only_select'] if group_select is None: group_select = self.group_select if group_select is None: group_select = settings.array_wrapper['group_select'] _self = self.regroup(group_by) group_select = group_select and _self.grouper.is_grouped() if index is None: index = _self.index if columns is None: if group_select: columns = _self.grouper.get_columns() else: columns = _self.columns if group_select: # Groups as columns i_wrapper = ArrayWrapper(index, columns, _self.get_ndim()) else: # Columns as columns i_wrapper = ArrayWrapper(index, columns, _self.ndim) n_rows = len(index) n_cols = len(columns) if column_only_select: if i_wrapper.ndim == 1: raise IndexingError( "Columns only: Attempting to select a column on a Series") col_mapper = i_wrapper.wrap_reduced(np.arange(n_cols), columns=columns) try: col_mapper = pd_indexing_func(col_mapper) except pd.core.indexing.IndexingError as e: warnings.warn( "Columns only: Make sure to treat this object " "as a Series of columns rather than a DataFrame", stacklevel=2) raise e if checks.is_series(col_mapper): new_columns = col_mapper.index col_idxs = col_mapper.values new_ndim = 2 else: new_columns = columns[[col_mapper]] col_idxs = col_mapper new_ndim = 1 new_index = index idx_idxs = np.arange(len(index)) else: idx_mapper = i_wrapper.wrap(np.broadcast_to( np.arange(n_rows)[:, None], (n_rows, n_cols)), index=index, columns=columns) idx_mapper = pd_indexing_func(idx_mapper) if i_wrapper.ndim == 1: if not checks.is_series(idx_mapper): raise IndexingError("Selection of a scalar is not allowed") idx_idxs = idx_mapper.values col_idxs = 0 else: col_mapper = i_wrapper.wrap(np.broadcast_to( np.arange(n_cols), (n_rows, n_cols)), index=index, columns=columns) col_mapper = pd_indexing_func(col_mapper) if checks.is_frame(idx_mapper): idx_idxs = idx_mapper.values[:, 0] col_idxs = col_mapper.values[0] elif checks.is_series(idx_mapper): one_col = np.all( col_mapper.values == col_mapper.values.item(0)) one_idx = np.all( idx_mapper.values == idx_mapper.values.item(0)) if one_col and one_idx: # One index and one column selected, multiple times raise IndexingError( "Must select at least two unique indices in one of both axes" ) elif one_col: # One column selected idx_idxs = idx_mapper.values col_idxs = col_mapper.values[0] elif one_idx: # One index selected idx_idxs = idx_mapper.values[0] col_idxs = col_mapper.values else: raise IndexingError else: raise IndexingError("Selection of a scalar is not allowed") new_index = index_fns.get_index(idx_mapper, 0) if not isinstance(idx_idxs, np.ndarray): # One index selected new_columns = index[[idx_idxs]] elif not isinstance(col_idxs, np.ndarray): # One column selected new_columns = columns[[col_idxs]] else: new_columns = index_fns.get_index(idx_mapper, 1) new_ndim = idx_mapper.ndim if _self.grouper.is_grouped(): # Grouping enabled if np.asarray(idx_idxs).ndim == 0: raise IndexingError( "Flipping index and columns is not allowed") if group_select: # Selection based on groups # Get indices of columns corresponding to selected groups group_idxs = col_idxs group_idxs_arr = reshape_fns.to_1d(group_idxs) group_start_idxs = _self.grouper.get_group_start_idxs( )[group_idxs_arr] group_end_idxs = _self.grouper.get_group_end_idxs( )[group_idxs_arr] ungrouped_col_idxs = get_ranges_arr(group_start_idxs, group_end_idxs) ungrouped_columns = _self.columns[ungrouped_col_idxs] if new_ndim == 1 and len(ungrouped_columns) == 1: ungrouped_ndim = 1 ungrouped_col_idxs = ungrouped_col_idxs[0] else: ungrouped_ndim = 2 # Get indices of selected groups corresponding to the new columns # We could do _self.group_by[ungrouped_col_idxs] but indexing operation may have changed the labels group_lens = _self.grouper.get_group_lens()[group_idxs_arr] ungrouped_group_idxs = np.full(len(ungrouped_columns), 0) ungrouped_group_idxs[group_lens[:-1]] = 1 ungrouped_group_idxs = np.cumsum(ungrouped_group_idxs) return _self.copy(index=new_index, columns=ungrouped_columns, ndim=ungrouped_ndim, grouped_ndim=new_ndim, group_by=new_columns[ungrouped_group_idxs] ), idx_idxs, group_idxs, ungrouped_col_idxs # Selection based on columns col_idxs_arr = reshape_fns.to_1d(col_idxs) return _self.copy(index=new_index, columns=new_columns, ndim=new_ndim, grouped_ndim=None, group_by=_self.grouper.group_by[col_idxs_arr] ), idx_idxs, col_idxs, col_idxs # Grouping disabled return _self.copy(index=new_index, columns=new_columns, ndim=new_ndim, grouped_ndim=None, group_by=None), idx_idxs, col_idxs, col_idxs
def wrap_reduced( self, arr: tp.ArrayLike, name_or_index: tp.NameIndex = None, columns: tp.Optional[tp.IndexLike] = None, fillna: tp.Optional[tp.Scalar] = None, dtype: tp.Optional[tp.PandasDTypeLike] = None, group_by: tp.GroupByLike = None, to_timedelta: bool = False, to_index: bool = False, silence_warnings: tp.Optional[bool] = None) -> tp.MaybeSeriesFrame: """Wrap result of reduction. `name_or_index` can be the name of the resulting series if reducing to a scalar per column, or the index of the resulting series/dataframe if reducing to an array per column. `columns` can be set to override object's default columns. See `ArrayWrapper.wrap` for the pipeline.""" from vectorbt._settings import settings array_wrapper_cfg = settings['array_wrapper'] if silence_warnings is None: silence_warnings = array_wrapper_cfg['silence_warnings'] checks.assert_not_none(self.ndim) _self = self.resolve(group_by=group_by) if columns is None: columns = _self.columns if not isinstance(columns, pd.Index): columns = pd.Index(columns) if to_index: if dtype is None: dtype = np.int_ if fillna is None: fillna = -1 def _wrap_reduced(arr): nonlocal name_or_index arr = np.asarray(arr) if fillna is not None: arr[pd.isnull(arr)] = fillna if arr.ndim == 0: # Scalar per Series/DataFrame return pd.Series(arr, dtype=dtype)[0] if arr.ndim == 1: if _self.ndim == 1: if arr.shape[0] == 1: # Scalar per Series/DataFrame with one column return pd.Series(arr, dtype=dtype)[0] # Array per Series sr_name = columns[0] if sr_name == 0: # was arr Series before sr_name = None if isinstance(name_or_index, str): name_or_index = None return pd.Series(arr, index=name_or_index, name=sr_name, dtype=dtype) # Scalar per column in arr DataFrame return pd.Series(arr, index=columns, name=name_or_index, dtype=dtype) if arr.ndim == 2: if arr.shape[1] == 1 and _self.ndim == 1: arr = reshape_fns.soft_to_ndim(arr, 1) # Array per Series sr_name = columns[0] if sr_name == 0: # was arr Series before sr_name = None if isinstance(name_or_index, str): name_or_index = None return pd.Series(arr, index=name_or_index, name=sr_name, dtype=dtype) # Array per column in DataFrame if isinstance(name_or_index, str): name_or_index = None return pd.DataFrame(arr, index=name_or_index, columns=columns, dtype=dtype) raise ValueError(f"{arr.ndim}-d input is not supported") out = _wrap_reduced(arr) if to_index: # Convert to index if checks.is_series(out): out = out.map(lambda x: self.index[x] if x != -1 else np.nan) elif checks.is_frame(out): out = out.applymap(lambda x: self.index[x] if x != -1 else np.nan) else: out = self.index[out] if out != -1 else np.nan if to_timedelta: # Convert to timedelta out = self.to_timedelta(out, silence_warnings=silence_warnings) return out
def test_is_frame(self): assert not checks.is_frame(0) assert not checks.is_frame(np.array([0])) assert not checks.is_frame(pd.Series([1, 2, 3])) assert checks.is_frame(pd.DataFrame([1, 2, 3]))