def concat(self_or_cls, *others, keys=None, broadcast_kwargs={}): """Concatenate with `others` along columns. All arguments will be broadcasted using `vectorbt.base.reshape_fns.broadcast` with `broadcast_kwargs`. Use `keys` as the outermost level. Example: ```python-repl >>> import vectorbt as vbt >>> import pandas as pd >>> sr = pd.Series([1, 2], index=['x', 'y']) >>> df = pd.DataFrame([[3, 4], [5, 6]], index=['x', 'y'], columns=['a', 'b']) >>> sr.vbt.concat(df, keys=['c', 'd']) c d a b a b x 1 1 3 4 y 2 2 5 6 ```""" others = tuple(map(lambda x: x._obj if isinstance(x, Base_Accessor) else x, others)) if isinstance(self_or_cls, type): objs = others else: objs = (self_or_cls._obj,) + others broadcasted = reshape_fns.broadcast(*objs, **broadcast_kwargs) broadcasted = tuple(map(reshape_fns.to_2d, broadcasted)) concatenated = pd.concat(broadcasted, axis=1) if keys is not None: concatenated.columns = index_fns.combine_indexes(keys, broadcasted[0].columns) return concatenated
def generate_stop_loss_exits(self, ts, stops, trailing=False, first=True, keys=None, broadcast_kwargs={}): """See `vectorbt.signals.nb.generate_stop_loss_exits_nb`. Arguments will be broadcasted using `vectorbt.base.reshape_fns.broadcast` with `broadcast_kwargs`. Argument `stops` can be either a single number, an array of numbers, or a 3D array, where each matrix corresponds to a single configuration. Use `keys` as the outermost level. Example: For each entry in `sig`, set stop loss for 10% and 20% below the entry price: ```python-repl >>> ts = pd.Series([1, 2, 3, 2, 1]) >>> print(sig.vbt.signals.generate_stop_loss_exits(ts, [0.1, 0.5])) stop_loss 0.1 0.5 a b c a b c 2020-01-01 False False False False False False 2020-01-02 False False False False False False 2020-01-03 False False False False False False 2020-01-04 False True True False False False 2020-01-05 False False False False False True >>> print(sig.vbt.signals.generate_stop_loss_exits(ts, [0.1, 0.5], trailing=True)) trail_stop 0.1 0.5 a b c a b c 2020-01-01 False False False False False False 2020-01-02 False False False False False False 2020-01-03 False False False False False False 2020-01-04 True True True False False False 2020-01-05 False False False True False True ```""" entries = self._obj checks.assert_type(ts, (pd.Series, pd.DataFrame)) entries, ts = reshape_fns.broadcast(entries, ts, **broadcast_kwargs, writeable=True) stops = reshape_fns.broadcast_to_array_of(stops, entries.vbt.to_2d_array()) exits = nb.generate_stop_loss_exits_nb(entries.vbt.to_2d_array(), ts.vbt.to_2d_array(), stops, trailing=trailing, first=first) # Build column hierarchy if keys is not None: param_columns = keys else: name = 'trail_stop' if trailing else 'stop_loss' param_columns = index_fns.index_from_values(stops, name=name) columns = index_fns.combine_indexes(param_columns, entries.vbt.columns) return entries.vbt.wrap(exits, columns=columns)
def repeat(self, n, keys=None, axis=1): """See `vectorbt.base.reshape_fns.repeat`. Set `axis` to 1 for columns and 0 for index. Use `keys` as the outermost level.""" repeated = reshape_fns.repeat(self._obj, n, axis=axis) if keys is not None: if axis == 1: new_columns = index_fns.combine_indexes( self.wrapper.columns, keys) return repeated.vbt.wrapper.wrap(repeated.values, columns=new_columns) else: new_index = index_fns.combine_indexes(self.wrapper.index, keys) return repeated.vbt.wrapper.wrap(repeated.values, index=new_index) return repeated
def repeat(self, n: int, keys: tp.Optional[tp.IndexLike] = None, axis: int = 1, wrap_kwargs: tp.KwargsLike = None) -> tp.SeriesFrame: """See `vectorbt.base.reshape_fns.repeat`. Set `axis` to 1 for columns and 0 for index. Use `keys` as the outermost level.""" repeated = reshape_fns.repeat(self.obj, n, axis=axis) if keys is not None: if axis == 1: new_columns = index_fns.combine_indexes([self.wrapper.columns, keys]) return ArrayWrapper.from_obj(repeated).wrap( repeated.values, **merge_dicts(dict(columns=new_columns), wrap_kwargs)) else: new_index = index_fns.combine_indexes([self.wrapper.index, keys]) return ArrayWrapper.from_obj(repeated).wrap( repeated.values, **merge_dicts(dict(index=new_index), wrap_kwargs)) return repeated
def repeat(self, n, keys=None): """See `vectorbt.base.reshape_fns.repeat`. Use `keys` as the outermost level.""" repeated = reshape_fns.repeat(self._obj, n, axis=1) if keys is not None: new_columns = index_fns.combine_indexes(self.columns, keys) return self.wrap(repeated.values, columns=new_columns) return repeated
def tile(self, n, keys=None): """See `vectorbt.base.reshape_fns.tile`. Use `keys` as the outermost level.""" tiled = reshape_fns.tile(self._obj, n, axis=1) if keys is not None: new_columns = index_fns.combine_indexes(keys, self.columns) return self.wrap(tiled.values, columns=new_columns) return tiled
def apply_and_concat(self, ntimes, *args, apply_func=None, to_2d=False, keys=None, **kwargs): """Apply `apply_func` `ntimes` times and concatenate the results along columns. See `vectorbt.base.combine_fns.apply_and_concat_one`. Arguments `*args` and `**kwargs` will be directly passed to `apply_func`. If `to_2d` is True, 2-dimensional NumPy arrays will be passed, otherwise as is. Use `keys` as the outermost level. !!! note The resulted arrays to be concatenated must have the same shape as broadcast input arrays. ## Example ```python-repl >>> import vectorbt as vbt >>> import pandas as pd >>> df = pd.DataFrame([[3, 4], [5, 6]], index=['x', 'y'], columns=['a', 'b']) >>> df.vbt.apply_and_concat(3, [1, 2, 3], ... apply_func=lambda i, a, b: a * b[i], keys=['c', 'd', 'e']) c d e a b a b a b x 3 4 6 8 9 12 y 5 6 10 12 15 18 ``` """ checks.assert_not_none(apply_func) # Optionally cast to 2d array if to_2d: obj_arr = reshape_fns.to_2d(self._obj, raw=True) else: obj_arr = np.asarray(self._obj) if checks.is_numba_func(apply_func): result = combine_fns.apply_and_concat_one_nb(ntimes, apply_func, obj_arr, *args, **kwargs) else: result = combine_fns.apply_and_concat_one(ntimes, apply_func, obj_arr, *args, **kwargs) # Build column hierarchy if keys is not None: new_columns = index_fns.combine_indexes(keys, self.wrapper.columns) else: top_columns = pd.Index(np.arange(ntimes), name='apply_idx') new_columns = index_fns.combine_indexes(top_columns, self.wrapper.columns) return self.wrapper.wrap(result, columns=new_columns, group_by=False)
def tile(self, n, keys=None, axis=1, wrap_kwargs=None): """See `vectorbt.base.reshape_fns.tile`. Set `axis` to 1 for columns and 0 for index. Use `keys` as the outermost level.""" tiled = reshape_fns.tile(self._obj, n, axis=axis) if keys is not None: if axis == 1: new_columns = index_fns.combine_indexes( keys, self.wrapper.columns) return tiled.vbt.wrapper.wrap( tiled.values, **merge_dicts(dict(columns=new_columns), wrap_kwargs)) else: new_index = index_fns.combine_indexes(keys, self.wrapper.index) return tiled.vbt.wrapper.wrap( tiled.values, **merge_dicts(dict(index=new_index), wrap_kwargs)) return tiled
def generate_take_profit_exits(self, ts, stops, first=True, iteratively=False, keys=None, broadcast_kwargs={}): """Generate take profit exits. See `vectorbt.signals.nb.generate_tp_ex_iter_nb` if `iteratively` is `True`, otherwise see `vectorbt.signals.nb.generate_tp_ex_nb`. Arguments will be broadcasted using `vectorbt.base.reshape_fns.broadcast` with `broadcast_kwargs`. Argument `stops` can be either a single number, an array of numbers, or a 3D array, where each matrix corresponds to a single configuration. Use `keys` as the outermost level. Example: For each entry in `sig`, set take profit for 10% and 20% above the entry price: ```python-repl >>> ts = pd.Series([1, 2, 3, 4, 5]) >>> sig.vbt.signals.generate_take_profit_exits(ts, [0.1, 0.5]) take_profit 0.1 0.5 a b c a b c 2020-01-01 False False False False False False 2020-01-02 True True False True True False 2020-01-03 False False False False False False 2020-01-04 False True True False False False 2020-01-05 False False False False False True ```""" entries = self._obj checks.assert_type(ts, (pd.Series, pd.DataFrame)) broadcast_kwargs = merge_kwargs(dict(require_kwargs=dict(requirements='W')), broadcast_kwargs) entries, ts = reshape_fns.broadcast(entries, ts, **broadcast_kwargs) stops = reshape_fns.broadcast_to_array_of(stops, entries.vbt.to_2d_array()) # Build column hierarchy if keys is not None: param_columns = keys else: param_columns = index_fns.index_from_values(stops, name='take_profit') columns = index_fns.combine_indexes(param_columns, entries.vbt.columns) # Perform generation if iteratively: new_entries, exits = nb.generate_tp_ex_iter_nb( entries.vbt.to_2d_array(), ts.vbt.to_2d_array(), stops) return entries.vbt.wrap(new_entries, columns=columns), entries.vbt.wrap(exits, columns=columns) else: exits = nb.generate_tp_ex_nb( entries.vbt.to_2d_array(), ts.vbt.to_2d_array(), stops, first=first) return entries.vbt.wrap(exits, columns=columns)
def split_into_ranges(self, n=None, range_len=None): """Split into `n` ranges each `range_len` long. At least one of `range_len` and `n` must be set. If `range_len` is `None`, will split evenly into `n` ranges. If `n` is `None`, will return the maximum number of ranges of length `range_len`. !!! note The datetime-like format of the index will be lost as result of this operation. Make sure to store the index metadata such as frequency information beforehand. Example: ```python-repl >>> print(df.vbt.split_into_ranges(n=2)) a b c range_start 2020-01-01 2020-01-04 2020-01-01 2020-01-04 2020-01-01 2020-01-04 range_end 2020-01-02 2020-01-05 2020-01-02 2020-01-05 2020-01-02 2020-01-05 0 1.0 4.0 5.0 2.0 1.0 2.0 1 2.0 5.0 4.0 1.0 2.0 1.0 >>> print(df.vbt.split_into_ranges(range_len=4)) a b c range_start 2020-01-01 2020-01-02 2020-01-01 2020-01-02 2020-01-01 2020-01-02 range_end 2020-01-04 2020-01-05 2020-01-04 2020-01-05 2020-01-04 2020-01-05 0 1.0 2.0 5.0 4.0 1.0 2.0 1 2.0 3.0 4.0 3.0 2.0 3.0 2 3.0 4.0 3.0 2.0 3.0 2.0 3 4.0 5.0 2.0 1.0 2.0 1.0 ```""" if range_len is None and n is None: raise ValueError("At least range_len or n must be set") if range_len is None: range_len = len(self.index) // n cube = nb.rolling_window_nb(self.to_2d_array(), range_len) if n is not None: if n > cube.shape[2]: raise ValueError(f"n cannot be bigger than the maximum number of ranges {cube.shape[2]}") idxs = np.round(np.linspace(0, cube.shape[2] - 1, n)).astype(int) cube = cube[:, :, idxs] else: idxs = np.arange(cube.shape[2]) matrix = np.hstack(cube) range_starts = pd.Index(self.index[idxs], name='range_start') range_ends = pd.Index(self.index[idxs + range_len - 1], name='range_end') range_columns = index_fns.stack_indexes(range_starts, range_ends) new_columns = index_fns.combine_indexes(self.columns, range_columns) return pd.DataFrame(matrix, columns=new_columns)
def build_column_hierarchy(param_list, level_names, ts_columns): """For each parameter in `param_list`, create a new column level with parameter values. Combine this level with columns `ts_columns` using Cartesian product. Excludes level names that are `None`.""" checks.assert_same_shape(param_list, level_names, axis=0) param_indexes = [] for i in range(len(param_list)): if level_names[i] is not None: param_index = index_fns.index_from_values(param_list[i], name=level_names[i]) param_indexes.append(param_index) if len(param_indexes) > 1: param_columns = index_fns.stack_indexes(*param_indexes) elif len(param_indexes) == 1: param_columns = param_indexes[0] else: param_columns = None if param_columns is not None: return index_fns.combine_indexes(param_columns, ts_columns) return ts_columns
def combine(self, other: tp.MaybeTupleList[tp.Union[tp.ArrayLike, "BaseAccessor"]], *args, allow_multiple: bool = True, combine_func: tp.Optional[tp.Callable] = None, keep_pd: bool = False, to_2d: bool = False, concat: bool = False, numba_loop: bool = False, use_ray: bool = False, broadcast: bool = True, broadcast_kwargs: tp.KwargsLike = None, keys: tp.Optional[tp.IndexLike] = None, wrap_kwargs: tp.KwargsLike = None, **kwargs) -> tp.SeriesFrame: """Combine with `other` using `combine_func`. Args: other (array_like): Object to combine this array with. *args: Variable arguments passed to `combine_func`. allow_multiple (bool): Whether a tuple/list will be considered as multiple objects in `other`. combine_func (callable): Function to combine two arrays. Can be Numba-compiled. keep_pd (bool): Whether to keep inputs as pandas objects, otherwise convert to NumPy arrays. to_2d (bool): Whether to reshape inputs to 2-dim arrays, otherwise keep as-is. concat (bool): Whether to concatenate the results along the column axis. Otherwise, pairwise combine into a Series/DataFrame of the same shape. If True, see `vectorbt.base.combine_fns.combine_and_concat`. If False, see `vectorbt.base.combine_fns.combine_multiple`. numba_loop (bool): Whether to loop using Numba. Set to True when iterating large number of times over small input, but note that Numba doesn't support variable keyword arguments. use_ray (bool): Whether to use Ray to execute `combine_func` in parallel. Only works with `numba_loop` set to False and `concat` is set to True. See `vectorbt.base.combine_fns.ray_apply` for related keyword arguments. broadcast (bool): Whether to broadcast all inputs. broadcast_kwargs (dict): Keyword arguments passed to `vectorbt.base.reshape_fns.broadcast`. keys (index_like): Outermost column level. wrap_kwargs (dict): Keyword arguments passed to `vectorbt.base.array_wrapper.ArrayWrapper.wrap`. **kwargs: Keyword arguments passed to `combine_func`. !!! note If `combine_func` is Numba-compiled, will broadcast using `WRITEABLE` and `C_CONTIGUOUS` flags, which can lead to an expensive computation overhead if passed objects are large and have different shape/memory order. You also must ensure that all objects have the same data type. Also remember to bring each in `*args` to a Numba-compatible format. ## Example ```python-repl >>> import vectorbt as vbt >>> import pandas as pd >>> sr = pd.Series([1, 2], index=['x', 'y']) >>> df = pd.DataFrame([[3, 4], [5, 6]], index=['x', 'y'], columns=['a', 'b']) >>> sr.vbt.combine(df, combine_func=lambda x, y: x + y) a b x 4 5 y 7 8 >>> sr.vbt.combine([df, df*2], combine_func=lambda x, y: x + y) a b x 10 13 y 17 20 >>> sr.vbt.combine([df, df*2], combine_func=lambda x, y: x + y, concat=True, keys=['c', 'd']) c d a b a b x 4 5 7 9 y 7 8 12 14 ``` Use Ray for small inputs and large processing times: ```python-repl >>> def combine_func(a, b): ... time.sleep(1) ... return a + b >>> sr = pd.Series([1, 2, 3]) >>> %timeit sr.vbt.combine([1, 1, 1], combine_func=combine_func) 3.01 s ± 2.98 ms per loop (mean ± std. dev. of 7 runs, 1 loop each) >>> %timeit sr.vbt.combine([1, 1, 1], combine_func=combine_func, concat=True, use_ray=True) 1.02 s ± 2.32 ms per loop (mean ± std. dev. of 7 runs, 1 loop each) ``` """ if not allow_multiple or not isinstance(other, (tuple, list)): others = (other, ) else: others = other others = tuple( map(lambda x: x.obj if isinstance(x, BaseAccessor) else x, others)) checks.assert_not_none(combine_func) # Broadcast arguments if broadcast: if broadcast_kwargs is None: broadcast_kwargs = {} if checks.is_numba_func(combine_func): # Numba requires writeable arrays # Plus all of our arrays must be in the same order broadcast_kwargs = merge_dicts( dict(require_kwargs=dict(requirements=['W', 'C'])), broadcast_kwargs) new_obj, *new_others = reshape_fns.broadcast( self.obj, *others, **broadcast_kwargs) else: new_obj, new_others = self.obj, others if not checks.is_pandas(new_obj): new_obj = ArrayWrapper.from_shape(new_obj.shape).wrap(new_obj) # Optionally cast to 2d array if to_2d: inputs = tuple( map(lambda x: reshape_fns.to_2d(x, raw=not keep_pd), (new_obj, *new_others))) else: if not keep_pd: inputs = tuple( map(lambda x: np.asarray(x), (new_obj, *new_others))) else: inputs = new_obj, *new_others if len(inputs) == 2: result = combine_func(inputs[0], inputs[1], *args, **kwargs) return ArrayWrapper.from_obj(new_obj).wrap( result, **merge_dicts({}, wrap_kwargs)) if concat: # Concat the results horizontally if checks.is_numba_func(combine_func) and numba_loop: if use_ray: raise ValueError("Ray cannot be used within Numba") for i in range(1, len(inputs)): checks.assert_meta_equal(inputs[i - 1], inputs[i]) result = combine_fns.combine_and_concat_nb( inputs[0], inputs[1:], combine_func, *args, **kwargs) else: if use_ray: result = combine_fns.combine_and_concat_ray( inputs[0], inputs[1:], combine_func, *args, **kwargs) else: result = combine_fns.combine_and_concat( inputs[0], inputs[1:], combine_func, *args, **kwargs) columns = ArrayWrapper.from_obj(new_obj).columns if keys is not None: new_columns = index_fns.combine_indexes([keys, columns]) else: top_columns = pd.Index(np.arange(len(new_others)), name='combine_idx') new_columns = index_fns.combine_indexes([top_columns, columns]) return ArrayWrapper.from_obj(new_obj).wrap( result, **merge_dicts(dict(columns=new_columns), wrap_kwargs)) else: # Combine arguments pairwise into one object if use_ray: raise ValueError("Ray cannot be used with concat=False") if checks.is_numba_func(combine_func) and numba_loop: for i in range(1, len(inputs)): checks.assert_dtype_equal(inputs[i - 1], inputs[i]) result = combine_fns.combine_multiple_nb( inputs, combine_func, *args, **kwargs) else: result = combine_fns.combine_multiple(inputs, combine_func, *args, **kwargs) return ArrayWrapper.from_obj(new_obj).wrap( result, **merge_dicts({}, wrap_kwargs))
def apply_and_concat(self, ntimes: int, *args, apply_func: tp.Optional[tp.Callable] = None, keep_pd: bool = False, to_2d: bool = False, numba_loop: bool = False, use_ray: bool = False, keys: tp.Optional[tp.IndexLike] = None, wrap_kwargs: tp.KwargsLike = None, **kwargs) -> tp.Frame: """Apply `apply_func` `ntimes` times and concatenate the results along columns. See `vectorbt.base.combine_fns.apply_and_concat_one`. Args: ntimes (int): Number of times to call `apply_func`. *args: Variable arguments passed to `apply_func`. apply_func (callable): Apply function. Can be Numba-compiled. keep_pd (bool): Whether to keep inputs as pandas objects, otherwise convert to NumPy arrays. to_2d (bool): Whether to reshape inputs to 2-dim arrays, otherwise keep as-is. numba_loop (bool): Whether to loop using Numba. Set to True when iterating large number of times over small input, but note that Numba doesn't support variable keyword arguments. use_ray (bool): Whether to use Ray to execute `combine_func` in parallel. Only works with `numba_loop` set to False and `concat` is set to True. See `vectorbt.base.combine_fns.ray_apply` for related keyword arguments. keys (index_like): Outermost column level. wrap_kwargs (dict): Keyword arguments passed to `vectorbt.base.array_wrapper.ArrayWrapper.wrap`. **kwargs: Keyword arguments passed to `combine_func`. !!! note The resulted arrays to be concatenated must have the same shape as broadcast input arrays. ## Example ```python-repl >>> import vectorbt as vbt >>> import pandas as pd >>> df = pd.DataFrame([[3, 4], [5, 6]], index=['x', 'y'], columns=['a', 'b']) >>> df.vbt.apply_and_concat(3, [1, 2, 3], ... apply_func=lambda i, a, b: a * b[i], keys=['c', 'd', 'e']) c d e a b a b a b x 3 4 6 8 9 12 y 5 6 10 12 15 18 ``` Use Ray for small inputs and large processing times: ```python-repl >>> def apply_func(i, a): ... time.sleep(1) ... return a >>> sr = pd.Series([1, 2, 3]) >>> %timeit sr.vbt.apply_and_concat(3, apply_func=apply_func) 3.01 s ± 2.15 ms per loop (mean ± std. dev. of 7 runs, 1 loop each) >>> %timeit sr.vbt.apply_and_concat(3, apply_func=apply_func, use_ray=True) 1.01 s ± 2.31 ms per loop (mean ± std. dev. of 7 runs, 1 loop each) ``` """ checks.assert_not_none(apply_func) # Optionally cast to 2d array if to_2d: obj = reshape_fns.to_2d(self.obj, raw=not keep_pd) else: if not keep_pd: obj = np.asarray(self.obj) else: obj = self.obj if checks.is_numba_func(apply_func) and numba_loop: if use_ray: raise ValueError("Ray cannot be used within Numba") result = combine_fns.apply_and_concat_one_nb( ntimes, apply_func, obj, *args, **kwargs) else: if use_ray: result = combine_fns.apply_and_concat_one_ray( ntimes, apply_func, obj, *args, **kwargs) else: result = combine_fns.apply_and_concat_one( ntimes, apply_func, obj, *args, **kwargs) # Build column hierarchy if keys is not None: new_columns = index_fns.combine_indexes( [keys, self.wrapper.columns]) else: top_columns = pd.Index(np.arange(ntimes), name='apply_idx') new_columns = index_fns.combine_indexes( [top_columns, self.wrapper.columns]) return self.wrapper.wrap(result, group_by=False, **merge_dicts(dict(columns=new_columns), wrap_kwargs))
def split_into_ranges(self, n=None, range_len=None, start_idxs=None, end_idxs=None): """Either split into `n` ranges each `range_len` long, or split into ranges between `start_idxs` and `end_idxs`. At least one of `range_len`, `n`, or `start_idxs` and `end_idxs` must be set. If `range_len` is `None`, will split evenly into `n` ranges. If `n` is `None`, will return the maximum number of ranges of length `range_len`. If `start_idxs` and `end_idxs`, will split into ranges between both arrays. Both index arrays must be either NumPy arrays with positions (last exclusive) or pandas indexes with labels (last inclusive). Created levels `range_start` and `range_end` will contain labels (last inclusive). !!! note Ranges must have the same length. The datetime-like format of the index will be lost as result of this operation. Make sure to store the index metadata such as frequency information beforehand. Example: ```python-repl >>> print(df.vbt.split_into_ranges(n=2)) a b c range_start 2020-01-01 2020-01-04 2020-01-01 2020-01-04 2020-01-01 2020-01-04 range_end 2020-01-02 2020-01-05 2020-01-02 2020-01-05 2020-01-02 2020-01-05 0 1.0 4.0 5.0 2.0 1.0 2.0 1 2.0 5.0 4.0 1.0 2.0 1.0 >>> print(df.vbt.split_into_ranges(range_len=4)) a b c range_start 2020-01-01 2020-01-02 2020-01-01 2020-01-02 2020-01-01 2020-01-02 range_end 2020-01-04 2020-01-05 2020-01-04 2020-01-05 2020-01-04 2020-01-05 0 1.0 2.0 5.0 4.0 1.0 2.0 1 2.0 3.0 4.0 3.0 2.0 3.0 2 3.0 4.0 3.0 2.0 3.0 2.0 3 4.0 5.0 2.0 1.0 2.0 1.0 >>> print(df.vbt.split_into_ranges(start_idxs=[0, 1], end_idxs=[4, 5])) a b c range_start 2020-01-01 2020-01-02 2020-01-01 2020-01-02 2020-01-01 2020-01-02 range_end 2020-01-04 2020-01-05 2020-01-04 2020-01-05 2020-01-04 2020-01-05 0 1 2 5 4 1 2 1 2 3 4 3 2 3 2 3 4 3 2 3 2 3 4 5 2 1 2 1 >>> print(df.vbt.split_into_ranges( ... start_idxs=pd.Index(['2020-01-01', '2020-01-03']), ... end_idxs=pd.Index(['2020-01-02', '2020-01-04']) ... )) a b c range_start 2020-01-01 2020-01-03 2020-01-01 2020-01-03 2020-01-01 2020-01-03 range_end 2020-01-02 2020-01-04 2020-01-02 2020-01-04 2020-01-02 2020-01-04 0 1 3 5 3 1 3 1 2 4 4 2 2 2 ```""" if start_idxs is None and end_idxs is None: if range_len is None and n is None: raise ValueError( "At least range_len, n, or start_idxs and end_idxs must be set" ) if range_len is None: range_len = len(self.index) // n start_idxs = np.arange(len(self.index) - range_len + 1) end_idxs = np.arange(range_len, len(self.index) + 1) elif start_idxs is None or end_idxs is None: raise ValueError("Both start_idxs and end_idxs must be set") else: if isinstance(start_idxs, pd.Index): start_idxs = np.where(self.index.isin(start_idxs))[0] else: start_idxs = np.asarray(start_idxs) if isinstance(end_idxs, pd.Index): end_idxs = np.where(self.index.isin(end_idxs))[0] + 1 else: end_idxs = np.asarray(end_idxs) if np.any((end_idxs - start_idxs) != (end_idxs - start_idxs).item(0)): raise ValueError("Ranges must have the same length") if n is not None: if n > len(start_idxs): raise ValueError( f"n cannot be bigger than the maximum number of ranges {len(start_idxs)}" ) idxs = np.round(np.linspace(0, len(start_idxs) - 1, n)).astype(int) start_idxs = start_idxs[idxs] end_idxs = end_idxs[idxs] matrix = nb.concat_ranges_nb(self.to_2d_array(), start_idxs, end_idxs) range_starts = pd.Index(self.index[start_idxs], name='range_start') range_ends = pd.Index(self.index[end_idxs - 1], name='range_end') range_columns = index_fns.stack_indexes(range_starts, range_ends) new_columns = index_fns.combine_indexes(self.columns, range_columns) return pd.DataFrame(matrix, columns=new_columns)
def combine_with_multiple(self, others, *args, combine_func=None, to_2d=False, concat=False, broadcast_kwargs={}, keys=None, **kwargs): """Combine with `others` using `combine_func`. All arguments will be broadcast using `vectorbt.base.reshape_fns.broadcast` with `broadcast_kwargs`. If `concat` is True, concatenate the results along columns, see `vectorbt.base.combine_fns.combine_and_concat`. Otherwise, pairwise combine into a Series/DataFrame of the same shape, see `vectorbt.base.combine_fns.combine_multiple`. Arguments `*args` and `**kwargs` will be directly passed to `combine_func`. If `to_2d` is True, 2-dimensional NumPy arrays will be passed, otherwise as is. Use `keys` as the outermost level. !!! note If `combine_func` is Numba-compiled, will broadcast using `WRITEABLE` and `C_CONTIGUOUS` flags, which can lead to an expensive computation overhead if passed objects are large and have different shape/memory order. You also must ensure that all objects have the same data type. Also remember to bring each in `*args` to a Numba-compatible format. ## Example ```python-repl >>> import vectorbt as vbt >>> import pandas as pd >>> sr = pd.Series([1, 2], index=['x', 'y']) >>> df = pd.DataFrame([[3, 4], [5, 6]], index=['x', 'y'], columns=['a', 'b']) >>> sr.vbt.combine_with_multiple([df, df*2], ... combine_func=lambda x, y: x + y) a b x 10 13 y 17 20 >>> sr.vbt.combine_with_multiple([df, df*2], ... combine_func=lambda x, y: x + y, concat=True, keys=['c', 'd']) c d a b a b x 4 5 7 9 y 7 8 12 14 ``` """ others = tuple(map(lambda x: x._obj if isinstance(x, Base_Accessor) else x, others)) checks.assert_not_none(combine_func) checks.assert_type(others, Iterable) # Broadcast arguments if checks.is_numba_func(combine_func): # Numba requires writeable arrays # Plus all of our arrays must be in the same order broadcast_kwargs = merge_dicts(dict(require_kwargs=dict(requirements=['W', 'C'])), broadcast_kwargs) new_obj, *new_others = reshape_fns.broadcast(self._obj, *others, **broadcast_kwargs) # Optionally cast to 2d array if to_2d: bc_arrays = tuple(map(lambda x: reshape_fns.to_2d(x, raw=True), (new_obj, *new_others))) else: bc_arrays = tuple(map(lambda x: np.asarray(x), (new_obj, *new_others))) if concat: # Concat the results horizontally if checks.is_numba_func(combine_func): for i in range(1, len(bc_arrays)): checks.assert_meta_equal(bc_arrays[i - 1], bc_arrays[i]) result = combine_fns.combine_and_concat_nb(bc_arrays[0], bc_arrays[1:], combine_func, *args, **kwargs) else: result = combine_fns.combine_and_concat(bc_arrays[0], bc_arrays[1:], combine_func, *args, **kwargs) columns = new_obj.vbt.wrapper.columns if keys is not None: new_columns = index_fns.combine_indexes(keys, columns) else: top_columns = pd.Index(np.arange(len(new_others)), name='combine_idx') new_columns = index_fns.combine_indexes(top_columns, columns) return new_obj.vbt.wrapper.wrap(result, columns=new_columns) else: # Combine arguments pairwise into one object if checks.is_numba_func(combine_func): for i in range(1, len(bc_arrays)): checks.assert_dtype_equal(bc_arrays[i - 1], bc_arrays[i]) result = combine_fns.combine_multiple_nb(bc_arrays, combine_func, *args, **kwargs) else: result = combine_fns.combine_multiple(bc_arrays, combine_func, *args, **kwargs) return new_obj.vbt.wrapper.wrap(result)