def test_is_numba_func(self): def test_func(x): return x @njit def test_func_nb(x): return x assert not checks.is_numba_func(test_func) assert checks.is_numba_func(test_func_nb)
def apply_and_concat(self, ntimes, *args, apply_func=None, as_columns=None, **kwargs): """Apply a function n times and concatenate results into a single dataframe.""" checks.assert_not_none(apply_func) if checks.is_numba_func(apply_func): # NOTE: your apply_func must a numba-compiled function and arguments must be numba-compatible # Also NOTE: outputs of apply_func must always be 2-dimensional result = combine_fns.apply_and_concat_nb(np.asarray(self._obj), ntimes, apply_func, *args, **kwargs) else: result = combine_fns.apply_and_concat(np.asarray(self._obj), ntimes, apply_func, *args, **kwargs) # Build column hierarchy if as_columns is not None: new_columns = index_fns.combine( as_columns, reshape_fns.to_2d(self._obj).columns) else: new_columns = index_fns.tile( reshape_fns.to_2d(self._obj).columns, ntimes) return self.wrap_array(result, columns=new_columns)
def apply_and_concat(self, ntimes, *args, apply_func=None, to_2d=False, keys=None, wrap_kwargs=None, **kwargs): """Apply `apply_func` `ntimes` times and concatenate the results along columns. See `vectorbt.base.combine_fns.apply_and_concat_one`. Arguments `*args` and `**kwargs` will be directly passed to `apply_func`. If `to_2d` is True, 2-dimensional NumPy arrays will be passed, otherwise as is. Use `keys` as the outermost level. !!! note The resulted arrays to be concatenated must have the same shape as broadcast input arrays. ## Example ```python-repl >>> import vectorbt as vbt >>> import pandas as pd >>> df = pd.DataFrame([[3, 4], [5, 6]], index=['x', 'y'], columns=['a', 'b']) >>> df.vbt.apply_and_concat(3, [1, 2, 3], ... apply_func=lambda i, a, b: a * b[i], keys=['c', 'd', 'e']) c d e a b a b a b x 3 4 6 8 9 12 y 5 6 10 12 15 18 ``` """ checks.assert_not_none(apply_func) # Optionally cast to 2d array if to_2d: obj_arr = reshape_fns.to_2d(self._obj, raw=True) else: obj_arr = np.asarray(self._obj) if checks.is_numba_func(apply_func): result = combine_fns.apply_and_concat_one_nb( ntimes, apply_func, obj_arr, *args, **kwargs) else: result = combine_fns.apply_and_concat_one(ntimes, apply_func, obj_arr, *args, **kwargs) # Build column hierarchy if keys is not None: new_columns = index_fns.combine_indexes(keys, self.wrapper.columns) else: top_columns = pd.Index(np.arange(ntimes), name='apply_idx') new_columns = index_fns.combine_indexes(top_columns, self.wrapper.columns) return self.wrapper.wrap(result, group_by=False, **merge_dicts(dict(columns=new_columns), wrap_kwargs))
def combine_with(self, other, *args, combine_func=None, to_2d=False, broadcast_kwargs=None, wrap_kwargs=None, **kwargs): """Combine both using `combine_func` into a Series/DataFrame of the same shape. All arguments will be broadcast using `vectorbt.base.reshape_fns.broadcast` with `broadcast_kwargs`. Arguments `*args` and `**kwargs` will be directly passed to `combine_func`. If `to_2d` is True, 2-dimensional NumPy arrays will be passed, otherwise as is. !!! note The resulted array must have the same shape as broadcast input arrays. ## Example ```python-repl >>> import vectorbt as vbt >>> import pandas as pd >>> sr = pd.Series([1, 2], index=['x', 'y']) >>> df = pd.DataFrame([[3, 4], [5, 6]], index=['x', 'y'], columns=['a', 'b']) >>> sr.vbt.combine_with(df, combine_func=lambda x, y: x + y) a b x 4 5 y 7 8 ``` """ if isinstance(other, BaseAccessor): other = other._obj checks.assert_not_none(combine_func) if broadcast_kwargs is None: broadcast_kwargs = {} if checks.is_numba_func(combine_func): # Numba requires writable arrays broadcast_kwargs = merge_dicts( dict(require_kwargs=dict(requirements='W')), broadcast_kwargs) new_obj, new_other = reshape_fns.broadcast(self._obj, other, **broadcast_kwargs) # Optionally cast to 2d array if to_2d: new_obj_arr = reshape_fns.to_2d(new_obj, raw=True) new_other_arr = reshape_fns.to_2d(new_other, raw=True) else: new_obj_arr = np.asarray(new_obj) new_other_arr = np.asarray(new_other) result = combine_func(new_obj_arr, new_other_arr, *args, **kwargs) return new_obj.vbt.wrapper.wrap(result, **merge_dicts({}, wrap_kwargs))
def apply_and_concat(self, ntimes, *args, apply_func=None, pass_2d=False, as_columns=None, **kwargs): """Apply `apply_func` `ntimes` times and concatenate the results along columns. See `vectorbt.utils.combine_fns.apply_and_concat`. Arguments `*args` and `**kwargs` will be directly passed to `apply_func`. If `pass_2d` is `True`, 2-dimensional NumPy arrays will be passed, otherwise as is. Use `as_columns` as a top-level column level. Example: ```python-repl >>> import pandas as pd >>> df = pd.DataFrame([[3, 4], [5, 6]], index=['x', 'y'], columns=['a', 'b']) >>> print(df.vbt.apply_and_concat(3, [1, 2, 3], ... apply_func=lambda i, a, b: a * b[i], as_columns=['c', 'd', 'e'])) c d e a b a b a b x 3 4 6 8 9 12 y 5 6 10 12 15 18 ```""" checks.assert_not_none(apply_func) # Optionally cast to 2d array if pass_2d: obj_arr = reshape_fns.to_2d(np.asarray(self._obj)) else: obj_arr = np.asarray(self._obj) if checks.is_numba_func(apply_func): result = combine_fns.apply_and_concat_nb(obj_arr, ntimes, apply_func, *args, **kwargs) else: result = combine_fns.apply_and_concat(obj_arr, ntimes, apply_func, *args, **kwargs) # Build column hierarchy if as_columns is not None: new_columns = index_fns.combine_indexes(as_columns, self.columns) else: new_columns = index_fns.tile_index(self.columns, ntimes) return self.wrap_array(result, columns=new_columns)
def combine_with(self, other, *args, combine_func=None, pass_2d=False, broadcast_kwargs={}, **kwargs): """Combine both using `combine_func` into a Series/DataFrame of the same shape. All arguments will be broadcasted using `vectorbt.utils.reshape_fns.broadcast` with `broadcast_kwargs`. Arguments `*args` and `**kwargs` will be directly passed to `combine_func`. If `pass_2d` is `True`, 2-dimensional NumPy arrays will be passed, otherwise as is. Example: ```python-repl >>> import pandas as pd >>> sr = pd.Series([1, 2], index=['x', 'y']) >>> df = pd.DataFrame([[3, 4], [5, 6]], index=['x', 'y'], columns=['a', 'b']) >>> print(sr.vbt.combine_with(df, combine_func=lambda x, y: x + y)) a b x 4 5 y 7 8 ```""" if isinstance(other, Base_Accessor): other = other._obj checks.assert_not_none(combine_func) if checks.is_numba_func(combine_func): # Numba requires writable arrays broadcast_kwargs = {**dict(writeable=True), **broadcast_kwargs} new_obj, new_other = reshape_fns.broadcast(self._obj, other, **broadcast_kwargs) # Optionally cast to 2d array if pass_2d: new_obj_arr = reshape_fns.to_2d(np.asarray(new_obj)) new_other_arr = reshape_fns.to_2d(np.asarray(new_other)) else: new_obj_arr = np.asarray(new_obj) new_other_arr = np.asarray(new_other) result = combine_func(new_obj_arr, new_other_arr, *args, **kwargs) return new_obj.vbt.wrap_array(result)
def combine_with(self, other, *args, combine_func=None, broadcast_kwargs={}, **kwargs): """Broadcast with other and combine. The returned shape is the same as broadcasted shape.""" if isinstance(other, Base_Accessor): other = other._obj checks.assert_not_none(combine_func) if checks.is_numba_func(combine_func): # Numba requires writable arrays broadcast_kwargs = {**dict(writeable=True), **broadcast_kwargs} new_obj, new_other = reshape_fns.broadcast(self._obj, other, **broadcast_kwargs) return new_obj.vbt.wrap_array( combine_func(np.asarray(new_obj), np.asarray(new_other), *args, **kwargs))
def custom_func(input_list, in_output_list, param_list, *args, input_shape=None, flex_2d=None, entry_args=None, exit_args=None, cache_args=None, entry_kwargs=None, exit_kwargs=None, cache_kwargs=None, return_cache=False, use_cache=None, **_kwargs): # Get arguments if len(input_list) == 0: if input_shape is None: raise ValueError("Pass input_shape if no input time series passed") else: input_shape = input_list[0].shape if entry_args is None: entry_args = () if exit_args is None: exit_args = () if cache_args is None: cache_args = () if exit_only: if len(exit_args) > 0: raise ValueError("Use *args instead of exit_args when exit_only=True") exit_args = args else: if len(args) > 0: raise ValueError("*args can be only used when exit_only=True") if entry_kwargs is None: entry_kwargs = {} if exit_kwargs is None: exit_kwargs = {} if cache_kwargs is None: cache_kwargs = {} if exit_only: if len(exit_kwargs) > 0: raise ValueError("Use **kwargs instead of exit_kwargs when exit_only=True") exit_kwargs = _kwargs else: if len(_kwargs) > 0: raise ValueError("**kwargs can be only used when exit_only=True") kwargs_defaults = dict( input_shape=input_shape, wait=1, first=True, flex_2d=flex_2d, ) entry_kwargs = merge_dicts(kwargs_defaults, entry_kwargs) exit_kwargs = merge_dicts(kwargs_defaults, exit_kwargs) cache_kwargs = merge_dicts(kwargs_defaults, cache_kwargs) entry_wait = entry_kwargs['wait'] exit_wait = exit_kwargs['wait'] # Distribute arguments across functions entry_input_tuple = () exit_input_tuple = () cache_input_tuple = () for input_name in entry_input_names: entry_input_tuple += (input_list[input_names.index(input_name)],) for input_name in exit_input_names: exit_input_tuple += (input_list[input_names.index(input_name)],) for input_name in cache_input_names: cache_input_tuple += (input_list[input_names.index(input_name)],) entry_in_output_list = [] exit_in_output_list = [] cache_in_output_list = [] for in_output_name in entry_in_output_names: entry_in_output_list.append(in_output_list[in_output_names.index(in_output_name)]) for in_output_name in exit_in_output_names: exit_in_output_list.append(in_output_list[in_output_names.index(in_output_name)]) for in_output_name in cache_in_output_names: cache_in_output_list.append(in_output_list[in_output_names.index(in_output_name)]) entry_param_list = [] exit_param_list = [] cache_param_list = [] for param_name in entry_param_names: entry_param_list.append(param_list[param_names.index(param_name)]) for param_name in exit_param_names: exit_param_list.append(param_list[param_names.index(param_name)]) for param_name in cache_param_names: cache_param_list.append(param_list[param_names.index(param_name)]) n_params = len(param_list[0]) if len(param_list) > 0 else 1 entry_in_output_tuples = list(zip(*entry_in_output_list)) exit_in_output_tuples = list(zip(*exit_in_output_list)) entry_param_tuples = list(zip(*entry_param_list)) exit_param_tuples = list(zip(*exit_param_list)) def _build_more_args(func_settings, func_kwargs): pass_kwargs = func_settings.get('pass_kwargs', []) more_args = () for key in pass_kwargs: value = None if isinstance(key, tuple): key, value = key else: if key.startswith('temp_idx_arr'): value = np.empty((input_shape[0],), dtype=np.int_) value = func_kwargs.get(key, value) more_args += (value,) return more_args entry_more_args = _build_more_args(entry_settings, entry_kwargs) exit_more_args = _build_more_args(exit_settings, exit_kwargs) cache_more_args = _build_more_args(cache_settings, cache_kwargs) # Caching cache = use_cache if cache is None and cache_func is not None: _cache_in_output_list = cache_in_output_list _cache_param_list = cache_param_list if checks.is_numba_func(cache_func): if len(_cache_in_output_list) > 0: _cache_in_output_list = [to_typed_list(in_outputs) for in_outputs in _cache_in_output_list] if len(_cache_param_list) > 0: _cache_param_list = [to_typed_list(params) for params in _cache_param_list] cache = cache_func( *cache_input_tuple, *_cache_in_output_list, *_cache_param_list, *cache_args, *cache_more_args ) if return_cache: return cache if cache is None: cache = () if not isinstance(cache, (tuple, list, List)): cache = (cache,) entry_cache = () exit_cache = () if entry_settings.get('pass_cache', False): entry_cache = cache if exit_settings.get('pass_cache', False): exit_cache = cache # Apply and concatenate if exit_only and not iteratively: if len(exit_in_output_names) > 0: _exit_in_output_tuples = (to_typed_list(exit_in_output_tuples),) else: _exit_in_output_tuples = () if len(exit_param_names) > 0: _exit_param_tuples = (to_typed_list(exit_param_tuples),) else: _exit_param_tuples = () return combine_fns.apply_and_concat_one_nb( n_params, apply_func_nb, input_list[0], exit_wait, exit_input_tuple, *_exit_in_output_tuples, *_exit_param_tuples, exit_args + exit_more_args + exit_cache ) else: if len(entry_in_output_names) > 0: _entry_in_output_tuples = (to_typed_list(entry_in_output_tuples),) else: _entry_in_output_tuples = () if len(entry_param_names) > 0: _entry_param_tuples = (to_typed_list(entry_param_tuples),) else: _entry_param_tuples = () if len(exit_in_output_names) > 0: _exit_in_output_tuples = (to_typed_list(exit_in_output_tuples),) else: _exit_in_output_tuples = () if len(exit_param_names) > 0: _exit_param_tuples = (to_typed_list(exit_param_tuples),) else: _exit_param_tuples = () return combine_fns.apply_and_concat_multiple_nb( n_params, apply_func_nb, input_shape, entry_wait, exit_wait, entry_input_tuple, exit_input_tuple, *_entry_in_output_tuples, *_exit_in_output_tuples, *_entry_param_tuples, *_exit_param_tuples, entry_args + entry_more_args + entry_cache, exit_args + exit_more_args + exit_cache )
def combine(self, other: tp.MaybeTupleList[tp.Union[tp.ArrayLike, "BaseAccessor"]], *args, allow_multiple: bool = True, combine_func: tp.Optional[tp.Callable] = None, keep_pd: bool = False, to_2d: bool = False, concat: bool = False, numba_loop: bool = False, use_ray: bool = False, broadcast: bool = True, broadcast_kwargs: tp.KwargsLike = None, keys: tp.Optional[tp.IndexLike] = None, wrap_kwargs: tp.KwargsLike = None, **kwargs) -> tp.SeriesFrame: """Combine with `other` using `combine_func`. Args: other (array_like): Object to combine this array with. *args: Variable arguments passed to `combine_func`. allow_multiple (bool): Whether a tuple/list will be considered as multiple objects in `other`. combine_func (callable): Function to combine two arrays. Can be Numba-compiled. keep_pd (bool): Whether to keep inputs as pandas objects, otherwise convert to NumPy arrays. to_2d (bool): Whether to reshape inputs to 2-dim arrays, otherwise keep as-is. concat (bool): Whether to concatenate the results along the column axis. Otherwise, pairwise combine into a Series/DataFrame of the same shape. If True, see `vectorbt.base.combine_fns.combine_and_concat`. If False, see `vectorbt.base.combine_fns.combine_multiple`. numba_loop (bool): Whether to loop using Numba. Set to True when iterating large number of times over small input, but note that Numba doesn't support variable keyword arguments. use_ray (bool): Whether to use Ray to execute `combine_func` in parallel. Only works with `numba_loop` set to False and `concat` is set to True. See `vectorbt.base.combine_fns.ray_apply` for related keyword arguments. broadcast (bool): Whether to broadcast all inputs. broadcast_kwargs (dict): Keyword arguments passed to `vectorbt.base.reshape_fns.broadcast`. keys (index_like): Outermost column level. wrap_kwargs (dict): Keyword arguments passed to `vectorbt.base.array_wrapper.ArrayWrapper.wrap`. **kwargs: Keyword arguments passed to `combine_func`. !!! note If `combine_func` is Numba-compiled, will broadcast using `WRITEABLE` and `C_CONTIGUOUS` flags, which can lead to an expensive computation overhead if passed objects are large and have different shape/memory order. You also must ensure that all objects have the same data type. Also remember to bring each in `*args` to a Numba-compatible format. ## Example ```python-repl >>> import vectorbt as vbt >>> import pandas as pd >>> sr = pd.Series([1, 2], index=['x', 'y']) >>> df = pd.DataFrame([[3, 4], [5, 6]], index=['x', 'y'], columns=['a', 'b']) >>> sr.vbt.combine(df, combine_func=lambda x, y: x + y) a b x 4 5 y 7 8 >>> sr.vbt.combine([df, df*2], combine_func=lambda x, y: x + y) a b x 10 13 y 17 20 >>> sr.vbt.combine([df, df*2], combine_func=lambda x, y: x + y, concat=True, keys=['c', 'd']) c d a b a b x 4 5 7 9 y 7 8 12 14 ``` Use Ray for small inputs and large processing times: ```python-repl >>> def combine_func(a, b): ... time.sleep(1) ... return a + b >>> sr = pd.Series([1, 2, 3]) >>> %timeit sr.vbt.combine([1, 1, 1], combine_func=combine_func) 3.01 s ± 2.98 ms per loop (mean ± std. dev. of 7 runs, 1 loop each) >>> %timeit sr.vbt.combine([1, 1, 1], combine_func=combine_func, concat=True, use_ray=True) 1.02 s ± 2.32 ms per loop (mean ± std. dev. of 7 runs, 1 loop each) ``` """ if not allow_multiple or not isinstance(other, (tuple, list)): others = (other, ) else: others = other others = tuple( map(lambda x: x.obj if isinstance(x, BaseAccessor) else x, others)) checks.assert_not_none(combine_func) # Broadcast arguments if broadcast: if broadcast_kwargs is None: broadcast_kwargs = {} if checks.is_numba_func(combine_func): # Numba requires writeable arrays # Plus all of our arrays must be in the same order broadcast_kwargs = merge_dicts( dict(require_kwargs=dict(requirements=['W', 'C'])), broadcast_kwargs) new_obj, *new_others = reshape_fns.broadcast( self.obj, *others, **broadcast_kwargs) else: new_obj, new_others = self.obj, others if not checks.is_pandas(new_obj): new_obj = ArrayWrapper.from_shape(new_obj.shape).wrap(new_obj) # Optionally cast to 2d array if to_2d: inputs = tuple( map(lambda x: reshape_fns.to_2d(x, raw=not keep_pd), (new_obj, *new_others))) else: if not keep_pd: inputs = tuple( map(lambda x: np.asarray(x), (new_obj, *new_others))) else: inputs = new_obj, *new_others if len(inputs) == 2: result = combine_func(inputs[0], inputs[1], *args, **kwargs) return ArrayWrapper.from_obj(new_obj).wrap( result, **merge_dicts({}, wrap_kwargs)) if concat: # Concat the results horizontally if checks.is_numba_func(combine_func) and numba_loop: if use_ray: raise ValueError("Ray cannot be used within Numba") for i in range(1, len(inputs)): checks.assert_meta_equal(inputs[i - 1], inputs[i]) result = combine_fns.combine_and_concat_nb( inputs[0], inputs[1:], combine_func, *args, **kwargs) else: if use_ray: result = combine_fns.combine_and_concat_ray( inputs[0], inputs[1:], combine_func, *args, **kwargs) else: result = combine_fns.combine_and_concat( inputs[0], inputs[1:], combine_func, *args, **kwargs) columns = ArrayWrapper.from_obj(new_obj).columns if keys is not None: new_columns = index_fns.combine_indexes([keys, columns]) else: top_columns = pd.Index(np.arange(len(new_others)), name='combine_idx') new_columns = index_fns.combine_indexes([top_columns, columns]) return ArrayWrapper.from_obj(new_obj).wrap( result, **merge_dicts(dict(columns=new_columns), wrap_kwargs)) else: # Combine arguments pairwise into one object if use_ray: raise ValueError("Ray cannot be used with concat=False") if checks.is_numba_func(combine_func) and numba_loop: for i in range(1, len(inputs)): checks.assert_dtype_equal(inputs[i - 1], inputs[i]) result = combine_fns.combine_multiple_nb( inputs, combine_func, *args, **kwargs) else: result = combine_fns.combine_multiple(inputs, combine_func, *args, **kwargs) return ArrayWrapper.from_obj(new_obj).wrap( result, **merge_dicts({}, wrap_kwargs))
def apply_and_concat(self, ntimes: int, *args, apply_func: tp.Optional[tp.Callable] = None, keep_pd: bool = False, to_2d: bool = False, numba_loop: bool = False, use_ray: bool = False, keys: tp.Optional[tp.IndexLike] = None, wrap_kwargs: tp.KwargsLike = None, **kwargs) -> tp.Frame: """Apply `apply_func` `ntimes` times and concatenate the results along columns. See `vectorbt.base.combine_fns.apply_and_concat_one`. Args: ntimes (int): Number of times to call `apply_func`. *args: Variable arguments passed to `apply_func`. apply_func (callable): Apply function. Can be Numba-compiled. keep_pd (bool): Whether to keep inputs as pandas objects, otherwise convert to NumPy arrays. to_2d (bool): Whether to reshape inputs to 2-dim arrays, otherwise keep as-is. numba_loop (bool): Whether to loop using Numba. Set to True when iterating large number of times over small input, but note that Numba doesn't support variable keyword arguments. use_ray (bool): Whether to use Ray to execute `combine_func` in parallel. Only works with `numba_loop` set to False and `concat` is set to True. See `vectorbt.base.combine_fns.ray_apply` for related keyword arguments. keys (index_like): Outermost column level. wrap_kwargs (dict): Keyword arguments passed to `vectorbt.base.array_wrapper.ArrayWrapper.wrap`. **kwargs: Keyword arguments passed to `combine_func`. !!! note The resulted arrays to be concatenated must have the same shape as broadcast input arrays. ## Example ```python-repl >>> import vectorbt as vbt >>> import pandas as pd >>> df = pd.DataFrame([[3, 4], [5, 6]], index=['x', 'y'], columns=['a', 'b']) >>> df.vbt.apply_and_concat(3, [1, 2, 3], ... apply_func=lambda i, a, b: a * b[i], keys=['c', 'd', 'e']) c d e a b a b a b x 3 4 6 8 9 12 y 5 6 10 12 15 18 ``` Use Ray for small inputs and large processing times: ```python-repl >>> def apply_func(i, a): ... time.sleep(1) ... return a >>> sr = pd.Series([1, 2, 3]) >>> %timeit sr.vbt.apply_and_concat(3, apply_func=apply_func) 3.01 s ± 2.15 ms per loop (mean ± std. dev. of 7 runs, 1 loop each) >>> %timeit sr.vbt.apply_and_concat(3, apply_func=apply_func, use_ray=True) 1.01 s ± 2.31 ms per loop (mean ± std. dev. of 7 runs, 1 loop each) ``` """ checks.assert_not_none(apply_func) # Optionally cast to 2d array if to_2d: obj = reshape_fns.to_2d(self.obj, raw=not keep_pd) else: if not keep_pd: obj = np.asarray(self.obj) else: obj = self.obj if checks.is_numba_func(apply_func) and numba_loop: if use_ray: raise ValueError("Ray cannot be used within Numba") result = combine_fns.apply_and_concat_one_nb( ntimes, apply_func, obj, *args, **kwargs) else: if use_ray: result = combine_fns.apply_and_concat_one_ray( ntimes, apply_func, obj, *args, **kwargs) else: result = combine_fns.apply_and_concat_one( ntimes, apply_func, obj, *args, **kwargs) # Build column hierarchy if keys is not None: new_columns = index_fns.combine_indexes( [keys, self.wrapper.columns]) else: top_columns = pd.Index(np.arange(ntimes), name='apply_idx') new_columns = index_fns.combine_indexes( [top_columns, self.wrapper.columns]) return self.wrapper.wrap(result, group_by=False, **merge_dicts(dict(columns=new_columns), wrap_kwargs))
def custom_func( input_list: tp.List[tp.AnyArray], in_output_list: tp.List[tp.List[tp.AnyArray]], param_list: tp.List[tp.List[tp.Param]], *args, input_shape: tp.Optional[tp.Shape] = None, flex_2d: tp.Optional[bool] = None, entry_args: tp.Optional[tp.Args] = None, exit_args: tp.Optional[tp.Args] = None, cache_args: tp.Optional[tp.Args] = None, entry_kwargs: tp.KwargsLike = None, exit_kwargs: tp.KwargsLike = None, cache_kwargs: tp.KwargsLike = None, return_cache: bool = False, use_cache: tp.Optional[CacheOutputT] = None, **_kwargs ) -> tp.Union[CacheOutputT, tp.Array2d, tp.List[tp.Array2d]]: # Get arguments if len(input_list) == 0: if input_shape is None: raise ValueError( "Pass input_shape if no input time series were passed") else: input_shape = input_list[0].shape if entry_args is None: entry_args = () if exit_args is None: exit_args = () if cache_args is None: cache_args = () if mode == FactoryMode.Entries: if len(entry_args) > 0: raise ValueError( "Use *args instead of entry_args with FactoryMode.Entries" ) entry_args = args elif mode == FactoryMode.Exits or (mode == FactoryMode.Chain and entry_choice_func == first_choice_nb): if len(exit_args) > 0: raise ValueError( "Use *args instead of exit_args " "with FactoryMode.Exits or FactoryMode.Chain") exit_args = args else: if len(args) > 0: raise ValueError( "*args cannot be used with FactoryMode.Both") if entry_kwargs is None: entry_kwargs = {} if exit_kwargs is None: exit_kwargs = {} if cache_kwargs is None: cache_kwargs = {} if mode == FactoryMode.Entries: if len(entry_kwargs) > 0: raise ValueError( "Use **kwargs instead of entry_kwargs with FactoryMode.Entries" ) entry_kwargs = _kwargs elif mode == FactoryMode.Exits or (mode == FactoryMode.Chain and entry_choice_func == first_choice_nb): if len(exit_kwargs) > 0: raise ValueError( "Use **kwargs instead of exit_kwargs " "with FactoryMode.Exits or FactoryMode.Chain") exit_kwargs = _kwargs else: if len(_kwargs) > 0: raise ValueError( "*args cannot be used with FactoryMode.Both") kwargs_defaults = dict( input_shape=input_shape, wait=1, until_next=True, skip_until_exit=False, pick_first=True, flex_2d=flex_2d, ) if mode == FactoryMode.Entries: kwargs_defaults['pick_first'] = False entry_kwargs = merge_dicts(kwargs_defaults, entry_kwargs) exit_kwargs = merge_dicts(kwargs_defaults, exit_kwargs) cache_kwargs = merge_dicts(kwargs_defaults, cache_kwargs) entry_wait = entry_kwargs['wait'] exit_wait = exit_kwargs['wait'] entry_pick_first = entry_kwargs['pick_first'] exit_pick_first = exit_kwargs['pick_first'] until_next = exit_kwargs['until_next'] skip_until_exit = exit_kwargs['skip_until_exit'] # Distribute arguments across functions entry_input_tuple = () exit_input_tuple = () cache_input_tuple = () for input_name in entry_input_names: entry_input_tuple += ( input_list[input_names.index(input_name)], ) for input_name in exit_input_names: exit_input_tuple += ( input_list[input_names.index(input_name)], ) for input_name in cache_input_names: cache_input_tuple += ( input_list[input_names.index(input_name)], ) entry_in_output_list = [] exit_in_output_list = [] cache_in_output_list = [] for in_output_name in entry_in_output_names: entry_in_output_list.append( in_output_list[in_output_names.index(in_output_name)]) for in_output_name in exit_in_output_names: exit_in_output_list.append( in_output_list[in_output_names.index(in_output_name)]) for in_output_name in cache_in_output_names: cache_in_output_list.append( in_output_list[in_output_names.index(in_output_name)]) entry_param_list = [] exit_param_list = [] cache_param_list = [] for param_name in entry_param_names: entry_param_list.append( param_list[param_names.index(param_name)]) for param_name in exit_param_names: exit_param_list.append( param_list[param_names.index(param_name)]) for param_name in cache_param_names: cache_param_list.append( param_list[param_names.index(param_name)]) n_params = len(param_list[0]) if len(param_list) > 0 else 1 entry_in_output_tuples = list(zip(*entry_in_output_list)) exit_in_output_tuples = list(zip(*exit_in_output_list)) entry_param_tuples = list(zip(*entry_param_list)) exit_param_tuples = list(zip(*exit_param_list)) def _build_more_args(func_settings: tp.Kwargs, func_kwargs: tp.Kwargs) -> tp.Args: pass_kwargs = func_settings.get('pass_kwargs', []) if isinstance(pass_kwargs, dict): pass_kwargs = list(pass_kwargs.items()) more_args = () for key in pass_kwargs: value = None if isinstance(key, tuple): key, value = key else: if key.startswith('temp_idx_arr'): value = np.empty((input_shape[0], ), dtype=np.int_) value = func_kwargs.get(key, value) more_args += (value, ) return more_args entry_more_args = _build_more_args(entry_settings, entry_kwargs) exit_more_args = _build_more_args(exit_settings, exit_kwargs) cache_more_args = _build_more_args(cache_settings, cache_kwargs) # Caching cache = use_cache if cache is None and cache_func is not None: _cache_in_output_list = cache_in_output_list _cache_param_list = cache_param_list if checks.is_numba_func(cache_func): if len(_cache_in_output_list) > 0: _cache_in_output_list = [ to_typed_list(in_outputs) for in_outputs in _cache_in_output_list ] if len(_cache_param_list) > 0: _cache_param_list = [ to_typed_list(params) for params in _cache_param_list ] cache = cache_func(*cache_input_tuple, *_cache_in_output_list, *_cache_param_list, *cache_args, *cache_more_args) if return_cache: return cache if cache is None: cache = () if not isinstance(cache, tuple): cache = (cache, ) entry_cache = () exit_cache = () if entry_settings.get('pass_cache', False): entry_cache = cache if exit_settings.get('pass_cache', False): exit_cache = cache # Apply and concatenate if mode == FactoryMode.Entries: if len(entry_in_output_names) > 0: if numba_loop: _entry_in_output_tuples = ( to_typed_list(entry_in_output_tuples), ) else: _entry_in_output_tuples = (entry_in_output_tuples, ) else: _entry_in_output_tuples = () if len(entry_param_names) > 0: if numba_loop: _entry_param_tuples = ( to_typed_list(entry_param_tuples), ) else: _entry_param_tuples = (entry_param_tuples, ) else: _entry_param_tuples = () return apply_and_concat_func( n_params, apply_func, input_shape, entry_pick_first, entry_input_tuple, *_entry_in_output_tuples, *_entry_param_tuples, entry_args + entry_more_args + entry_cache) elif mode == FactoryMode.Exits: if len(exit_in_output_names) > 0: if numba_loop: _exit_in_output_tuples = ( to_typed_list(exit_in_output_tuples), ) else: _exit_in_output_tuples = (exit_in_output_tuples, ) else: _exit_in_output_tuples = () if len(exit_param_names) > 0: if numba_loop: _exit_param_tuples = ( to_typed_list(exit_param_tuples), ) else: _exit_param_tuples = (exit_param_tuples, ) else: _exit_param_tuples = () return apply_and_concat_func( n_params, apply_func, input_list[0], exit_wait, until_next, skip_until_exit, exit_pick_first, exit_input_tuple, *_exit_in_output_tuples, *_exit_param_tuples, exit_args + exit_more_args + exit_cache) else: if len(entry_in_output_names) > 0: if numba_loop: _entry_in_output_tuples = ( to_typed_list(entry_in_output_tuples), ) else: _entry_in_output_tuples = (entry_in_output_tuples, ) else: _entry_in_output_tuples = () if len(entry_param_names) > 0: if numba_loop: _entry_param_tuples = ( to_typed_list(entry_param_tuples), ) else: _entry_param_tuples = (entry_param_tuples, ) else: _entry_param_tuples = () if len(exit_in_output_names) > 0: if numba_loop: _exit_in_output_tuples = ( to_typed_list(exit_in_output_tuples), ) else: _exit_in_output_tuples = (exit_in_output_tuples, ) else: _exit_in_output_tuples = () if len(exit_param_names) > 0: if numba_loop: _exit_param_tuples = ( to_typed_list(exit_param_tuples), ) else: _exit_param_tuples = (exit_param_tuples, ) else: _exit_param_tuples = () return apply_and_concat_func( n_params, apply_func, input_shape, entry_wait, exit_wait, entry_pick_first, exit_pick_first, entry_input_tuple, exit_input_tuple, *_entry_in_output_tuples, *_exit_in_output_tuples, *_entry_param_tuples, *_exit_param_tuples, entry_args + entry_more_args + entry_cache, exit_args + exit_more_args + exit_cache)
def from_apply_func(self, apply_func, caching_func=None): """Build indicator class around a custom apply function. In contrast to `IndicatorFactory.from_custom_func`, this method handles a lot of things for you, such as caching, parameter selection, and concatenation. All you have to do is to write `apply_func` that accepts a selection of parameters (single values as opposed to multiple values in `IndicatorFactory.from_custom_func`) and does the calculation. It then automatically concatenates the results into a single array per output. While this approach is much more simpler, it is also less flexible, since you can only work with one parameter selection at a time, and can't view all parameters. !!! note If `apply_func` is a Numba-compiled function: * All inputs are automatically converted to NumPy arrays * Each argument in `*args` must be of a Numba-compatible type * You cannot pass keyword arguments * Your outputs must be arrays of the same shape, data type and data order Args: apply_func (function): A function (can be Numba-compiled) that takes broadcasted time series arrays corresponding to `ts_names`, single parameter selection corresponding to `param_names`, and other arguments and keyword arguments, and returns outputs corresponding to `output_names`. caching_func (function): A caching function to preprocess data beforehand. All returned objects will be passed as additional arguments to `apply_func`. Returns: CustomIndicator Examples: ```python-repl >>> @njit ... def apply_func_nb(ts1, ts2, p1, p2, arg1): ... return ts1 * p1 + arg1, ts2 * p2 + arg1 >>> MyInd = vbt.IndicatorFactory( ... ts_names=['ts1', 'ts2'], ... param_names=['p1', 'p2'], ... output_names=['o1', 'o2'] ... ).from_apply_func(apply_func_nb) >>> myInd = MyInd.from_params(price_sm, price_sm * 2, [1, 2], [3, 4], 100) >>> print(myInd.o1) custom_p1 1 1 2 2 custom_p2 3 3 4 4 a b a b 2018-01-01 101.0 105.0 102.0 110.0 2018-01-02 102.0 104.0 104.0 108.0 2018-01-03 103.0 103.0 106.0 106.0 2018-01-04 104.0 102.0 108.0 104.0 2018-01-05 105.0 101.0 110.0 102.0 >>> print(myInd.o2) custom_p1 1 1 2 2 custom_p2 3 3 4 4 a b a b 2018-01-01 106.0 130.0 108.0 140.0 2018-01-02 112.0 124.0 116.0 132.0 2018-01-03 118.0 118.0 124.0 124.0 2018-01-04 124.0 112.0 132.0 116.0 2018-01-05 130.0 106.0 140.0 108.0 ``` """ output_names = self.output_names num_outputs = len(output_names) if checks.is_numba_func(apply_func): if num_outputs > 1: apply_and_concat_func = combine_fns.apply_and_concat_multiple_nb else: apply_and_concat_func = combine_fns.apply_and_concat_one_nb @njit def select_params_func_nb(i, apply_func, ts_list, param_tuples, *args): # Select the next tuple of parameters return apply_func(*ts_list, *param_tuples[i], *args) def custom_func(ts_list, param_list, *args, return_cache=False, cache=None): # avoid deprecation warnings typed_ts_list = tuple( map(lambda x: x.vbt.to_2d_array(), ts_list)) typed_param_tuples = List() for param_tuple in list(zip(*param_list)): typed_param_tuples.append(param_tuple) # Caching if cache is None and caching_func is not None: cache = caching_func(*typed_ts_list, *param_list, *args) if return_cache: return cache if cache is None: cache = () if not isinstance(cache, (tuple, list, List)): cache = (cache, ) return apply_and_concat_func(param_list[0].shape[0], select_params_func_nb, apply_func, typed_ts_list, typed_param_tuples, *args, *cache) else: if num_outputs > 1: apply_and_concat_func = combine_fns.apply_and_concat_multiple else: apply_and_concat_func = combine_fns.apply_and_concat_one def select_params_func(i, apply_func, ts_list, param_list, *args, **kwargs): # Select the next tuple of parameters param_is = list(map(lambda x: x[i], param_list)) return apply_func(*ts_list, *param_is, *args, **kwargs) def custom_func(ts_list, param_list, *args, return_cache=False, cache=None, **kwargs): # Caching if cache is None and caching_func is not None: cache = caching_func(*ts_list, *param_list, *args, **kwargs) if return_cache: return cache if cache is None: cache = () if not isinstance(cache, (tuple, list, List)): cache = (cache, ) return apply_and_concat_func(param_list[0].shape[0], select_params_func, apply_func, ts_list, param_list, *args, *cache, **kwargs) return self.from_custom_func(custom_func, pass_lists=True)
def combine_with_multiple(self, others, *args, combine_func=None, pass_2d=False, concat=False, broadcast_kwargs={}, as_columns=None, **kwargs): """Combine with `others` using `combine_func`. All arguments will be broadcasted using `vectorbt.utils.reshape_fns.broadcast` with `broadcast_kwargs`. If `concat` is `True`, concatenate the results along columns, see `vectorbt.utils.combine_fns.combine_and_concat`. Otherwise, pairwise combine into a Series/DataFrame of the same shape, see `vectorbt.utils.combine_fns.combine_multiple`. Arguments `*args` and `**kwargs` will be directly passed to `combine_func`. If `pass_2d` is `True`, 2-dimensional NumPy arrays will be passed, otherwise as is. Use `as_columns` as a top-level column level. !!! note If `combine_func` is Numba-compiled, will broadcast using `writeable=True` and copy using `order='C'` flags, which can lead to an expensive computation overhead if passed objects are large and have different shape/memory order. You also must ensure that all objects have the same data type. Also remember to bring each in `*args` to a Numba-compatible format. Example: ```python-repl >>> import pandas as pd >>> sr = pd.Series([1, 2], index=['x', 'y']) >>> df = pd.DataFrame([[3, 4], [5, 6]], index=['x', 'y'], columns=['a', 'b']) >>> print(sr.vbt.combine_with_multiple([df, df*2], ... combine_func=lambda x, y: x + y)) a b x 10 13 y 17 20 >>> print(sr.vbt.combine_with_multiple([df, df*2], ... combine_func=lambda x, y: x + y, concat=True, as_columns=['c', 'd'])) c d a b a b x 4 5 7 9 y 7 8 12 14 ```""" others = tuple( map(lambda x: x._obj if isinstance(x, Base_Accessor) else x, others)) checks.assert_not_none(combine_func) checks.assert_type(others, Iterable) # Broadcast arguments if checks.is_numba_func(combine_func): # Numba requires writable arrays broadcast_kwargs = {**dict(writeable=True), **broadcast_kwargs} # Plus all of our arrays must be in the same order broadcast_kwargs['copy_kwargs'] = { **dict(order='C'), **broadcast_kwargs.get('copy_kwargs', {}) } new_obj, *new_others = reshape_fns.broadcast(self._obj, *others, **broadcast_kwargs) # Optionally cast to 2d array if pass_2d: bc_arrays = tuple( map(lambda x: reshape_fns.to_2d(np.asarray(x)), (new_obj, *new_others))) else: bc_arrays = tuple( map(lambda x: np.asarray(x), (new_obj, *new_others))) if concat: # Concat the results horizontally if checks.is_numba_func(combine_func): for i in range(1, len(bc_arrays)): checks.assert_same_meta(bc_arrays[i - 1], bc_arrays[i]) result = combine_fns.combine_and_concat_nb( bc_arrays[0], bc_arrays[1:], combine_func, *args, **kwargs) else: result = combine_fns.combine_and_concat( bc_arrays[0], bc_arrays[1:], combine_func, *args, **kwargs) columns = new_obj.vbt.columns if as_columns is not None: new_columns = index_fns.combine_indexes(as_columns, columns) else: new_columns = index_fns.tile_index(columns, len(others)) return new_obj.vbt.wrap_array(result, columns=new_columns) else: # Combine arguments pairwise into one object if checks.is_numba_func(combine_func): for i in range(1, len(bc_arrays)): checks.assert_same_dtype(bc_arrays[i - 1], bc_arrays[i]) result = combine_fns.combine_multiple_nb( bc_arrays, combine_func, *args, **kwargs) else: result = combine_fns.combine_multiple(bc_arrays, combine_func, *args, **kwargs) return new_obj.vbt.wrap_array(result)
def combine_with_multiple(self, others, *args, combine_func=None, concat=False, broadcast_kwargs={}, as_columns=None, **kwargs): """Broadcast with other objects to the same shape and combine them all pairwise. The returned shape is the same as broadcasted shape if concat is False. The returned shape is concatenation of broadcasted shapes if concat is True.""" others = tuple( map(lambda x: x._obj if isinstance(x, Base_Accessor) else x, others)) checks.assert_not_none(combine_func) checks.assert_type(others, Iterable) # Broadcast arguments if checks.is_numba_func(combine_func): # Numba requires writable arrays broadcast_kwargs = {**dict(writeable=True), **broadcast_kwargs} # Plus all of our arrays must be in the same order broadcast_kwargs['copy_kwargs'] = { **dict(order='C'), **broadcast_kwargs.get('copy_kwargs', {}) } new_obj, *new_others = reshape_fns.broadcast(self._obj, *others, **broadcast_kwargs) broadcasted = tuple(map(np.asarray, (new_obj, *new_others))) if concat: # Concat the results horizontally if checks.is_numba_func(combine_func): for i in range(1, len(broadcasted)): # NOTE: all inputs must have the same dtype checks.assert_same_meta(broadcasted[i - 1], broadcasted[i]) result = combine_fns.combine_and_concat_nb( broadcasted[0], broadcasted[1:], combine_func, *args, **kwargs) else: result = combine_fns.combine_and_concat( broadcasted[0], broadcasted[1:], combine_func, *args, **kwargs) if as_columns is not None: new_columns = index_fns.combine( as_columns, reshape_fns.to_2d(new_obj).columns) else: new_columns = index_fns.tile( reshape_fns.to_2d(new_obj).columns, len(others)) return new_obj.vbt.wrap_array(result, columns=new_columns) else: # Combine arguments pairwise into one object if checks.is_numba_func(combine_func): for i in range(1, len(broadcasted)): # NOTE: all inputs must have the same dtype checks.assert_same_dtype(broadcasted[i - 1], broadcasted[i]) result = combine_fns.combine_multiple_nb( broadcasted, combine_func, *args, **kwargs) else: result = combine_fns.combine_multiple(broadcasted, combine_func, *args, **kwargs) return new_obj.vbt.wrap_array(result)
def combine_with_multiple(self, others, *args, combine_func=None, to_2d=False, concat=False, broadcast_kwargs={}, keys=None, **kwargs): """Combine with `others` using `combine_func`. All arguments will be broadcast using `vectorbt.base.reshape_fns.broadcast` with `broadcast_kwargs`. If `concat` is True, concatenate the results along columns, see `vectorbt.base.combine_fns.combine_and_concat`. Otherwise, pairwise combine into a Series/DataFrame of the same shape, see `vectorbt.base.combine_fns.combine_multiple`. Arguments `*args` and `**kwargs` will be directly passed to `combine_func`. If `to_2d` is True, 2-dimensional NumPy arrays will be passed, otherwise as is. Use `keys` as the outermost level. !!! note If `combine_func` is Numba-compiled, will broadcast using `WRITEABLE` and `C_CONTIGUOUS` flags, which can lead to an expensive computation overhead if passed objects are large and have different shape/memory order. You also must ensure that all objects have the same data type. Also remember to bring each in `*args` to a Numba-compatible format. ## Example ```python-repl >>> import vectorbt as vbt >>> import pandas as pd >>> sr = pd.Series([1, 2], index=['x', 'y']) >>> df = pd.DataFrame([[3, 4], [5, 6]], index=['x', 'y'], columns=['a', 'b']) >>> sr.vbt.combine_with_multiple([df, df*2], ... combine_func=lambda x, y: x + y) a b x 10 13 y 17 20 >>> sr.vbt.combine_with_multiple([df, df*2], ... combine_func=lambda x, y: x + y, concat=True, keys=['c', 'd']) c d a b a b x 4 5 7 9 y 7 8 12 14 ``` """ others = tuple(map(lambda x: x._obj if isinstance(x, Base_Accessor) else x, others)) checks.assert_not_none(combine_func) checks.assert_type(others, Iterable) # Broadcast arguments if checks.is_numba_func(combine_func): # Numba requires writeable arrays # Plus all of our arrays must be in the same order broadcast_kwargs = merge_dicts(dict(require_kwargs=dict(requirements=['W', 'C'])), broadcast_kwargs) new_obj, *new_others = reshape_fns.broadcast(self._obj, *others, **broadcast_kwargs) # Optionally cast to 2d array if to_2d: bc_arrays = tuple(map(lambda x: reshape_fns.to_2d(x, raw=True), (new_obj, *new_others))) else: bc_arrays = tuple(map(lambda x: np.asarray(x), (new_obj, *new_others))) if concat: # Concat the results horizontally if checks.is_numba_func(combine_func): for i in range(1, len(bc_arrays)): checks.assert_meta_equal(bc_arrays[i - 1], bc_arrays[i]) result = combine_fns.combine_and_concat_nb(bc_arrays[0], bc_arrays[1:], combine_func, *args, **kwargs) else: result = combine_fns.combine_and_concat(bc_arrays[0], bc_arrays[1:], combine_func, *args, **kwargs) columns = new_obj.vbt.wrapper.columns if keys is not None: new_columns = index_fns.combine_indexes(keys, columns) else: top_columns = pd.Index(np.arange(len(new_others)), name='combine_idx') new_columns = index_fns.combine_indexes(top_columns, columns) return new_obj.vbt.wrapper.wrap(result, columns=new_columns) else: # Combine arguments pairwise into one object if checks.is_numba_func(combine_func): for i in range(1, len(bc_arrays)): checks.assert_dtype_equal(bc_arrays[i - 1], bc_arrays[i]) result = combine_fns.combine_multiple_nb(bc_arrays, combine_func, *args, **kwargs) else: result = combine_fns.combine_multiple(bc_arrays, combine_func, *args, **kwargs) return new_obj.vbt.wrapper.wrap(result)
def combine_with(self, other, *args, combine_func=None, keep_pd=False, to_2d=False, broadcast=True, broadcast_kwargs=None, wrap_kwargs=None, **kwargs): """Combine both using `combine_func` into a Series/DataFrame of the same shape. Args: other (array_like): Object to be combined with this array. *args: Variable arguments passed to `combine_func`. combine_func (callable): Function to combine two arrays. Can be Numba-compiled. keep_pd (bool): Whether to keep inputs as pandas objects, otherwise convert to NumPy arrays. to_2d (bool): Whether to reshape inputs to 2-dim arrays, otherwise keep as-is. broadcast (bool): Whether to broadcast all inputs. broadcast_kwargs (dict): Keyword arguments passed to `vectorbt.base.reshape_fns.broadcast`. wrap_kwargs (dict): Keyword arguments passed to `vectorbt.base.array_wrapper.ArrayWrapper.wrap`. **kwargs: Keyword arguments passed to `combine_func`. !!! note The resulted array must have the same shape as broadcast input arrays. ## Example ```python-repl >>> import vectorbt as vbt >>> import pandas as pd >>> sr = pd.Series([1, 2], index=['x', 'y']) >>> df = pd.DataFrame([[3, 4], [5, 6]], index=['x', 'y'], columns=['a', 'b']) >>> sr.vbt.combine_with(df, combine_func=lambda x, y: x + y) a b x 4 5 y 7 8 ``` """ if isinstance(other, BaseAccessor): other = other._obj checks.assert_not_none(combine_func) if broadcast: if broadcast_kwargs is None: broadcast_kwargs = {} if checks.is_numba_func(combine_func): # Numba requires writable arrays broadcast_kwargs = merge_dicts( dict(require_kwargs=dict(requirements='W')), broadcast_kwargs) new_obj, new_other = reshape_fns.broadcast(self._obj, other, **broadcast_kwargs) else: new_obj, new_other = self._obj, other # Optionally cast to 2d array if to_2d: inputs = tuple( map(lambda x: reshape_fns.to_2d(x, raw=not keep_pd), (new_obj, new_other))) else: if not keep_pd: inputs = tuple( map(lambda x: np.asarray(x), (new_obj, new_other))) else: inputs = new_obj, new_other result = combine_func(inputs[0], inputs[1], *args, **kwargs) return new_obj.vbt.wrapper.wrap(result, **merge_dicts({}, wrap_kwargs))