예제 #1
0
    def test_is_numba_func(self):
        def test_func(x):
            return x

        @njit
        def test_func_nb(x):
            return x

        assert not checks.is_numba_func(test_func)
        assert checks.is_numba_func(test_func_nb)
예제 #2
0
 def apply_and_concat(self,
                      ntimes,
                      *args,
                      apply_func=None,
                      as_columns=None,
                      **kwargs):
     """Apply a function n times and concatenate results into a single dataframe."""
     checks.assert_not_none(apply_func)
     if checks.is_numba_func(apply_func):
         # NOTE: your apply_func must a numba-compiled function and arguments must be numba-compatible
         # Also NOTE: outputs of apply_func must always be 2-dimensional
         result = combine_fns.apply_and_concat_nb(np.asarray(self._obj),
                                                  ntimes, apply_func, *args,
                                                  **kwargs)
     else:
         result = combine_fns.apply_and_concat(np.asarray(self._obj),
                                               ntimes, apply_func, *args,
                                               **kwargs)
     # Build column hierarchy
     if as_columns is not None:
         new_columns = index_fns.combine(
             as_columns,
             reshape_fns.to_2d(self._obj).columns)
     else:
         new_columns = index_fns.tile(
             reshape_fns.to_2d(self._obj).columns, ntimes)
     return self.wrap_array(result, columns=new_columns)
예제 #3
0
    def apply_and_concat(self,
                         ntimes,
                         *args,
                         apply_func=None,
                         to_2d=False,
                         keys=None,
                         wrap_kwargs=None,
                         **kwargs):
        """Apply `apply_func` `ntimes` times and concatenate the results along columns.
        See `vectorbt.base.combine_fns.apply_and_concat_one`.

        Arguments `*args` and `**kwargs` will be directly passed to `apply_func`.
        If `to_2d` is True, 2-dimensional NumPy arrays will be passed, otherwise as is.
        Use `keys` as the outermost level.

        !!! note
            The resulted arrays to be concatenated must have the same shape as broadcast input arrays.

        ## Example

        ```python-repl
        >>> import vectorbt as vbt
        >>> import pandas as pd

        >>> df = pd.DataFrame([[3, 4], [5, 6]], index=['x', 'y'], columns=['a', 'b'])
        >>> df.vbt.apply_and_concat(3, [1, 2, 3],
        ...     apply_func=lambda i, a, b: a * b[i], keys=['c', 'd', 'e'])
              c       d       e
           a  b   a   b   a   b
        x  3  4   6   8   9  12
        y  5  6  10  12  15  18
        ```
        """
        checks.assert_not_none(apply_func)
        # Optionally cast to 2d array
        if to_2d:
            obj_arr = reshape_fns.to_2d(self._obj, raw=True)
        else:
            obj_arr = np.asarray(self._obj)
        if checks.is_numba_func(apply_func):
            result = combine_fns.apply_and_concat_one_nb(
                ntimes, apply_func, obj_arr, *args, **kwargs)
        else:
            result = combine_fns.apply_and_concat_one(ntimes, apply_func,
                                                      obj_arr, *args, **kwargs)
        # Build column hierarchy
        if keys is not None:
            new_columns = index_fns.combine_indexes(keys, self.wrapper.columns)
        else:
            top_columns = pd.Index(np.arange(ntimes), name='apply_idx')
            new_columns = index_fns.combine_indexes(top_columns,
                                                    self.wrapper.columns)
        return self.wrapper.wrap(result,
                                 group_by=False,
                                 **merge_dicts(dict(columns=new_columns),
                                               wrap_kwargs))
예제 #4
0
    def combine_with(self,
                     other,
                     *args,
                     combine_func=None,
                     to_2d=False,
                     broadcast_kwargs=None,
                     wrap_kwargs=None,
                     **kwargs):
        """Combine both using `combine_func` into a Series/DataFrame of the same shape.

        All arguments will be broadcast using `vectorbt.base.reshape_fns.broadcast`
        with `broadcast_kwargs`.

        Arguments `*args` and `**kwargs` will be directly passed to `combine_func`.
        If `to_2d` is True, 2-dimensional NumPy arrays will be passed, otherwise as is.

        !!! note
            The resulted array must have the same shape as broadcast input arrays.

        ## Example

        ```python-repl
        >>> import vectorbt as vbt
        >>> import pandas as pd

        >>> sr = pd.Series([1, 2], index=['x', 'y'])
        >>> df = pd.DataFrame([[3, 4], [5, 6]], index=['x', 'y'], columns=['a', 'b'])
        >>> sr.vbt.combine_with(df, combine_func=lambda x, y: x + y)
           a  b
        x  4  5
        y  7  8
        ```
        """
        if isinstance(other, BaseAccessor):
            other = other._obj
        checks.assert_not_none(combine_func)
        if broadcast_kwargs is None:
            broadcast_kwargs = {}
        if checks.is_numba_func(combine_func):
            # Numba requires writable arrays
            broadcast_kwargs = merge_dicts(
                dict(require_kwargs=dict(requirements='W')), broadcast_kwargs)
        new_obj, new_other = reshape_fns.broadcast(self._obj, other,
                                                   **broadcast_kwargs)
        # Optionally cast to 2d array
        if to_2d:
            new_obj_arr = reshape_fns.to_2d(new_obj, raw=True)
            new_other_arr = reshape_fns.to_2d(new_other, raw=True)
        else:
            new_obj_arr = np.asarray(new_obj)
            new_other_arr = np.asarray(new_other)
        result = combine_func(new_obj_arr, new_other_arr, *args, **kwargs)
        return new_obj.vbt.wrapper.wrap(result, **merge_dicts({}, wrap_kwargs))
예제 #5
0
    def apply_and_concat(self,
                         ntimes,
                         *args,
                         apply_func=None,
                         pass_2d=False,
                         as_columns=None,
                         **kwargs):
        """Apply `apply_func` `ntimes` times and concatenate the results along columns.
        See `vectorbt.utils.combine_fns.apply_and_concat`.

        Arguments `*args` and `**kwargs` will be directly passed to `apply_func`.
        If `pass_2d` is `True`, 2-dimensional NumPy arrays will be passed, otherwise as is.
        Use `as_columns` as a top-level column level.

        Example:
            ```python-repl
            >>> import pandas as pd
            >>> df = pd.DataFrame([[3, 4], [5, 6]], index=['x', 'y'], columns=['a', 'b'])

            >>> print(df.vbt.apply_and_concat(3, [1, 2, 3], 
            ...     apply_func=lambda i, a, b: a * b[i], as_columns=['c', 'd', 'e']))
                  c       d       e    
               a  b   a   b   a   b
            x  3  4   6   8   9  12
            y  5  6  10  12  15  18
            ```"""
        checks.assert_not_none(apply_func)
        # Optionally cast to 2d array
        if pass_2d:
            obj_arr = reshape_fns.to_2d(np.asarray(self._obj))
        else:
            obj_arr = np.asarray(self._obj)
        if checks.is_numba_func(apply_func):
            result = combine_fns.apply_and_concat_nb(obj_arr, ntimes,
                                                     apply_func, *args,
                                                     **kwargs)
        else:
            result = combine_fns.apply_and_concat(obj_arr, ntimes, apply_func,
                                                  *args, **kwargs)
        # Build column hierarchy
        if as_columns is not None:
            new_columns = index_fns.combine_indexes(as_columns, self.columns)
        else:
            new_columns = index_fns.tile_index(self.columns, ntimes)
        return self.wrap_array(result, columns=new_columns)
예제 #6
0
    def combine_with(self,
                     other,
                     *args,
                     combine_func=None,
                     pass_2d=False,
                     broadcast_kwargs={},
                     **kwargs):
        """Combine both using `combine_func` into a Series/DataFrame of the same shape.

        All arguments will be broadcasted using `vectorbt.utils.reshape_fns.broadcast`
        with `broadcast_kwargs`.

        Arguments `*args` and `**kwargs` will be directly passed to `combine_func`.
        If `pass_2d` is `True`, 2-dimensional NumPy arrays will be passed, otherwise as is.

        Example:
            ```python-repl
            >>> import pandas as pd
            >>> sr = pd.Series([1, 2], index=['x', 'y'])
            >>> df = pd.DataFrame([[3, 4], [5, 6]], index=['x', 'y'], columns=['a', 'b'])

            >>> print(sr.vbt.combine_with(df, combine_func=lambda x, y: x + y))
               a  b
            x  4  5
            y  7  8
            ```"""
        if isinstance(other, Base_Accessor):
            other = other._obj
        checks.assert_not_none(combine_func)
        if checks.is_numba_func(combine_func):
            # Numba requires writable arrays
            broadcast_kwargs = {**dict(writeable=True), **broadcast_kwargs}
        new_obj, new_other = reshape_fns.broadcast(self._obj, other,
                                                   **broadcast_kwargs)
        # Optionally cast to 2d array
        if pass_2d:
            new_obj_arr = reshape_fns.to_2d(np.asarray(new_obj))
            new_other_arr = reshape_fns.to_2d(np.asarray(new_other))
        else:
            new_obj_arr = np.asarray(new_obj)
            new_other_arr = np.asarray(new_other)
        result = combine_func(new_obj_arr, new_other_arr, *args, **kwargs)
        return new_obj.vbt.wrap_array(result)
예제 #7
0
    def combine_with(self,
                     other,
                     *args,
                     combine_func=None,
                     broadcast_kwargs={},
                     **kwargs):
        """Broadcast with other and combine.

        The returned shape is the same as broadcasted shape."""
        if isinstance(other, Base_Accessor):
            other = other._obj
        checks.assert_not_none(combine_func)
        if checks.is_numba_func(combine_func):
            # Numba requires writable arrays
            broadcast_kwargs = {**dict(writeable=True), **broadcast_kwargs}
        new_obj, new_other = reshape_fns.broadcast(self._obj, other,
                                                   **broadcast_kwargs)
        return new_obj.vbt.wrap_array(
            combine_func(np.asarray(new_obj), np.asarray(new_other), *args,
                         **kwargs))
예제 #8
0
파일: factory.py 프로젝트: wcy/vectorbt
        def custom_func(input_list, in_output_list, param_list, *args, input_shape=None, flex_2d=None,
                        entry_args=None, exit_args=None, cache_args=None, entry_kwargs=None,
                        exit_kwargs=None, cache_kwargs=None, return_cache=False, use_cache=None, **_kwargs):
            # Get arguments
            if len(input_list) == 0:
                if input_shape is None:
                    raise ValueError("Pass input_shape if no input time series passed")
            else:
                input_shape = input_list[0].shape

            if entry_args is None:
                entry_args = ()
            if exit_args is None:
                exit_args = ()
            if cache_args is None:
                cache_args = ()
            if exit_only:
                if len(exit_args) > 0:
                    raise ValueError("Use *args instead of exit_args when exit_only=True")
                exit_args = args
            else:
                if len(args) > 0:
                    raise ValueError("*args can be only used when exit_only=True")

            if entry_kwargs is None:
                entry_kwargs = {}
            if exit_kwargs is None:
                exit_kwargs = {}
            if cache_kwargs is None:
                cache_kwargs = {}
            if exit_only:
                if len(exit_kwargs) > 0:
                    raise ValueError("Use **kwargs instead of exit_kwargs when exit_only=True")
                exit_kwargs = _kwargs
            else:
                if len(_kwargs) > 0:
                    raise ValueError("**kwargs can be only used when exit_only=True")

            kwargs_defaults = dict(
                input_shape=input_shape,
                wait=1,
                first=True,
                flex_2d=flex_2d,
            )
            entry_kwargs = merge_dicts(kwargs_defaults, entry_kwargs)
            exit_kwargs = merge_dicts(kwargs_defaults, exit_kwargs)
            cache_kwargs = merge_dicts(kwargs_defaults, cache_kwargs)
            entry_wait = entry_kwargs['wait']
            exit_wait = exit_kwargs['wait']

            # Distribute arguments across functions
            entry_input_tuple = ()
            exit_input_tuple = ()
            cache_input_tuple = ()
            for input_name in entry_input_names:
                entry_input_tuple += (input_list[input_names.index(input_name)],)
            for input_name in exit_input_names:
                exit_input_tuple += (input_list[input_names.index(input_name)],)
            for input_name in cache_input_names:
                cache_input_tuple += (input_list[input_names.index(input_name)],)

            entry_in_output_list = []
            exit_in_output_list = []
            cache_in_output_list = []
            for in_output_name in entry_in_output_names:
                entry_in_output_list.append(in_output_list[in_output_names.index(in_output_name)])
            for in_output_name in exit_in_output_names:
                exit_in_output_list.append(in_output_list[in_output_names.index(in_output_name)])
            for in_output_name in cache_in_output_names:
                cache_in_output_list.append(in_output_list[in_output_names.index(in_output_name)])

            entry_param_list = []
            exit_param_list = []
            cache_param_list = []
            for param_name in entry_param_names:
                entry_param_list.append(param_list[param_names.index(param_name)])
            for param_name in exit_param_names:
                exit_param_list.append(param_list[param_names.index(param_name)])
            for param_name in cache_param_names:
                cache_param_list.append(param_list[param_names.index(param_name)])

            n_params = len(param_list[0]) if len(param_list) > 0 else 1
            entry_in_output_tuples = list(zip(*entry_in_output_list))
            exit_in_output_tuples = list(zip(*exit_in_output_list))
            entry_param_tuples = list(zip(*entry_param_list))
            exit_param_tuples = list(zip(*exit_param_list))

            def _build_more_args(func_settings, func_kwargs):
                pass_kwargs = func_settings.get('pass_kwargs', [])
                more_args = ()
                for key in pass_kwargs:
                    value = None
                    if isinstance(key, tuple):
                        key, value = key
                    else:
                        if key.startswith('temp_idx_arr'):
                            value = np.empty((input_shape[0],), dtype=np.int_)
                    value = func_kwargs.get(key, value)
                    more_args += (value,)
                return more_args

            entry_more_args = _build_more_args(entry_settings, entry_kwargs)
            exit_more_args = _build_more_args(exit_settings, exit_kwargs)
            cache_more_args = _build_more_args(cache_settings, cache_kwargs)

            # Caching
            cache = use_cache
            if cache is None and cache_func is not None:
                _cache_in_output_list = cache_in_output_list
                _cache_param_list = cache_param_list
                if checks.is_numba_func(cache_func):
                    if len(_cache_in_output_list) > 0:
                        _cache_in_output_list = [to_typed_list(in_outputs) for in_outputs in _cache_in_output_list]
                    if len(_cache_param_list) > 0:
                        _cache_param_list = [to_typed_list(params) for params in _cache_param_list]

                cache = cache_func(
                    *cache_input_tuple,
                    *_cache_in_output_list,
                    *_cache_param_list,
                    *cache_args,
                    *cache_more_args
                )
            if return_cache:
                return cache
            if cache is None:
                cache = ()
            if not isinstance(cache, (tuple, list, List)):
                cache = (cache,)

            entry_cache = ()
            exit_cache = ()
            if entry_settings.get('pass_cache', False):
                entry_cache = cache
            if exit_settings.get('pass_cache', False):
                exit_cache = cache

            # Apply and concatenate
            if exit_only and not iteratively:
                if len(exit_in_output_names) > 0:
                    _exit_in_output_tuples = (to_typed_list(exit_in_output_tuples),)
                else:
                    _exit_in_output_tuples = ()
                if len(exit_param_names) > 0:
                    _exit_param_tuples = (to_typed_list(exit_param_tuples),)
                else:
                    _exit_param_tuples = ()

                return combine_fns.apply_and_concat_one_nb(
                    n_params,
                    apply_func_nb,
                    input_list[0],
                    exit_wait,
                    exit_input_tuple,
                    *_exit_in_output_tuples,
                    *_exit_param_tuples,
                    exit_args + exit_more_args + exit_cache
                )

            else:
                if len(entry_in_output_names) > 0:
                    _entry_in_output_tuples = (to_typed_list(entry_in_output_tuples),)
                else:
                    _entry_in_output_tuples = ()
                if len(entry_param_names) > 0:
                    _entry_param_tuples = (to_typed_list(entry_param_tuples),)
                else:
                    _entry_param_tuples = ()
                if len(exit_in_output_names) > 0:
                    _exit_in_output_tuples = (to_typed_list(exit_in_output_tuples),)
                else:
                    _exit_in_output_tuples = ()
                if len(exit_param_names) > 0:
                    _exit_param_tuples = (to_typed_list(exit_param_tuples),)
                else:
                    _exit_param_tuples = ()

                return combine_fns.apply_and_concat_multiple_nb(
                    n_params,
                    apply_func_nb,
                    input_shape,
                    entry_wait,
                    exit_wait,
                    entry_input_tuple,
                    exit_input_tuple,
                    *_entry_in_output_tuples,
                    *_exit_in_output_tuples,
                    *_entry_param_tuples,
                    *_exit_param_tuples,
                    entry_args + entry_more_args + entry_cache,
                    exit_args + exit_more_args + exit_cache
                )
예제 #9
0
    def combine(self,
                other: tp.MaybeTupleList[tp.Union[tp.ArrayLike,
                                                  "BaseAccessor"]],
                *args,
                allow_multiple: bool = True,
                combine_func: tp.Optional[tp.Callable] = None,
                keep_pd: bool = False,
                to_2d: bool = False,
                concat: bool = False,
                numba_loop: bool = False,
                use_ray: bool = False,
                broadcast: bool = True,
                broadcast_kwargs: tp.KwargsLike = None,
                keys: tp.Optional[tp.IndexLike] = None,
                wrap_kwargs: tp.KwargsLike = None,
                **kwargs) -> tp.SeriesFrame:
        """Combine with `other` using `combine_func`.

        Args:
            other (array_like): Object to combine this array with.
            *args: Variable arguments passed to `combine_func`.
            allow_multiple (bool): Whether a tuple/list will be considered as multiple objects in `other`.
            combine_func (callable): Function to combine two arrays.

                Can be Numba-compiled.
            keep_pd (bool): Whether to keep inputs as pandas objects, otherwise convert to NumPy arrays.
            to_2d (bool): Whether to reshape inputs to 2-dim arrays, otherwise keep as-is.
            concat (bool): Whether to concatenate the results along the column axis.
                Otherwise, pairwise combine into a Series/DataFrame of the same shape.

                If True, see `vectorbt.base.combine_fns.combine_and_concat`.
                If False, see `vectorbt.base.combine_fns.combine_multiple`.
            numba_loop (bool): Whether to loop using Numba.

                Set to True when iterating large number of times over small input,
                but note that Numba doesn't support variable keyword arguments.
            use_ray (bool): Whether to use Ray to execute `combine_func` in parallel.

                Only works with `numba_loop` set to False and `concat` is set to True.
                See `vectorbt.base.combine_fns.ray_apply` for related keyword arguments.
            broadcast (bool): Whether to broadcast all inputs.
            broadcast_kwargs (dict): Keyword arguments passed to `vectorbt.base.reshape_fns.broadcast`.
            keys (index_like): Outermost column level.
            wrap_kwargs (dict): Keyword arguments passed to `vectorbt.base.array_wrapper.ArrayWrapper.wrap`.
            **kwargs: Keyword arguments passed to `combine_func`.

        !!! note
            If `combine_func` is Numba-compiled, will broadcast using `WRITEABLE` and `C_CONTIGUOUS`
            flags, which can lead to an expensive computation overhead if passed objects are large and
            have different shape/memory order. You also must ensure that all objects have the same data type.

            Also remember to bring each in `*args` to a Numba-compatible format.

        ## Example

        ```python-repl
        >>> import vectorbt as vbt
        >>> import pandas as pd

        >>> sr = pd.Series([1, 2], index=['x', 'y'])
        >>> df = pd.DataFrame([[3, 4], [5, 6]], index=['x', 'y'], columns=['a', 'b'])

        >>> sr.vbt.combine(df, combine_func=lambda x, y: x + y)
           a  b
        x  4  5
        y  7  8

        >>> sr.vbt.combine([df, df*2], combine_func=lambda x, y: x + y)
            a   b
        x  10  13
        y  17  20

        >>> sr.vbt.combine([df, df*2], combine_func=lambda x, y: x + y, concat=True, keys=['c', 'd'])
              c       d
           a  b   a   b
        x  4  5   7   9
        y  7  8  12  14
        ```

        Use Ray for small inputs and large processing times:

        ```python-repl
        >>> def combine_func(a, b):
        ...     time.sleep(1)
        ...     return a + b

        >>> sr = pd.Series([1, 2, 3])

        >>> %timeit sr.vbt.combine([1, 1, 1], combine_func=combine_func)
        3.01 s ± 2.98 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)

        >>> %timeit sr.vbt.combine([1, 1, 1], combine_func=combine_func, concat=True, use_ray=True)
        1.02 s ± 2.32 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
        ```
        """
        if not allow_multiple or not isinstance(other, (tuple, list)):
            others = (other, )
        else:
            others = other
        others = tuple(
            map(lambda x: x.obj if isinstance(x, BaseAccessor) else x, others))
        checks.assert_not_none(combine_func)
        # Broadcast arguments
        if broadcast:
            if broadcast_kwargs is None:
                broadcast_kwargs = {}
            if checks.is_numba_func(combine_func):
                # Numba requires writeable arrays
                # Plus all of our arrays must be in the same order
                broadcast_kwargs = merge_dicts(
                    dict(require_kwargs=dict(requirements=['W', 'C'])),
                    broadcast_kwargs)
            new_obj, *new_others = reshape_fns.broadcast(
                self.obj, *others, **broadcast_kwargs)
        else:
            new_obj, new_others = self.obj, others
        if not checks.is_pandas(new_obj):
            new_obj = ArrayWrapper.from_shape(new_obj.shape).wrap(new_obj)
        # Optionally cast to 2d array
        if to_2d:
            inputs = tuple(
                map(lambda x: reshape_fns.to_2d(x, raw=not keep_pd),
                    (new_obj, *new_others)))
        else:
            if not keep_pd:
                inputs = tuple(
                    map(lambda x: np.asarray(x), (new_obj, *new_others)))
            else:
                inputs = new_obj, *new_others
        if len(inputs) == 2:
            result = combine_func(inputs[0], inputs[1], *args, **kwargs)
            return ArrayWrapper.from_obj(new_obj).wrap(
                result, **merge_dicts({}, wrap_kwargs))
        if concat:
            # Concat the results horizontally
            if checks.is_numba_func(combine_func) and numba_loop:
                if use_ray:
                    raise ValueError("Ray cannot be used within Numba")
                for i in range(1, len(inputs)):
                    checks.assert_meta_equal(inputs[i - 1], inputs[i])
                result = combine_fns.combine_and_concat_nb(
                    inputs[0], inputs[1:], combine_func, *args, **kwargs)
            else:
                if use_ray:
                    result = combine_fns.combine_and_concat_ray(
                        inputs[0], inputs[1:], combine_func, *args, **kwargs)
                else:
                    result = combine_fns.combine_and_concat(
                        inputs[0], inputs[1:], combine_func, *args, **kwargs)
            columns = ArrayWrapper.from_obj(new_obj).columns
            if keys is not None:
                new_columns = index_fns.combine_indexes([keys, columns])
            else:
                top_columns = pd.Index(np.arange(len(new_others)),
                                       name='combine_idx')
                new_columns = index_fns.combine_indexes([top_columns, columns])
            return ArrayWrapper.from_obj(new_obj).wrap(
                result, **merge_dicts(dict(columns=new_columns), wrap_kwargs))
        else:
            # Combine arguments pairwise into one object
            if use_ray:
                raise ValueError("Ray cannot be used with concat=False")
            if checks.is_numba_func(combine_func) and numba_loop:
                for i in range(1, len(inputs)):
                    checks.assert_dtype_equal(inputs[i - 1], inputs[i])
                result = combine_fns.combine_multiple_nb(
                    inputs, combine_func, *args, **kwargs)
            else:
                result = combine_fns.combine_multiple(inputs, combine_func,
                                                      *args, **kwargs)
            return ArrayWrapper.from_obj(new_obj).wrap(
                result, **merge_dicts({}, wrap_kwargs))
예제 #10
0
    def apply_and_concat(self,
                         ntimes: int,
                         *args,
                         apply_func: tp.Optional[tp.Callable] = None,
                         keep_pd: bool = False,
                         to_2d: bool = False,
                         numba_loop: bool = False,
                         use_ray: bool = False,
                         keys: tp.Optional[tp.IndexLike] = None,
                         wrap_kwargs: tp.KwargsLike = None,
                         **kwargs) -> tp.Frame:
        """Apply `apply_func` `ntimes` times and concatenate the results along columns.
        See `vectorbt.base.combine_fns.apply_and_concat_one`.

        Args:
            ntimes (int): Number of times to call `apply_func`.
            *args: Variable arguments passed to `apply_func`.
            apply_func (callable): Apply function.

                Can be Numba-compiled.
            keep_pd (bool): Whether to keep inputs as pandas objects, otherwise convert to NumPy arrays.
            to_2d (bool): Whether to reshape inputs to 2-dim arrays, otherwise keep as-is.
            numba_loop (bool): Whether to loop using Numba.

                Set to True when iterating large number of times over small input,
                but note that Numba doesn't support variable keyword arguments.
            use_ray (bool): Whether to use Ray to execute `combine_func` in parallel.

                Only works with `numba_loop` set to False and `concat` is set to True.
                See `vectorbt.base.combine_fns.ray_apply` for related keyword arguments.
            keys (index_like): Outermost column level.
            wrap_kwargs (dict): Keyword arguments passed to `vectorbt.base.array_wrapper.ArrayWrapper.wrap`.
            **kwargs: Keyword arguments passed to `combine_func`.

        !!! note
            The resulted arrays to be concatenated must have the same shape as broadcast input arrays.

        ## Example

        ```python-repl
        >>> import vectorbt as vbt
        >>> import pandas as pd

        >>> df = pd.DataFrame([[3, 4], [5, 6]], index=['x', 'y'], columns=['a', 'b'])
        >>> df.vbt.apply_and_concat(3, [1, 2, 3],
        ...     apply_func=lambda i, a, b: a * b[i], keys=['c', 'd', 'e'])
              c       d       e
           a  b   a   b   a   b
        x  3  4   6   8   9  12
        y  5  6  10  12  15  18
        ```

        Use Ray for small inputs and large processing times:

        ```python-repl
        >>> def apply_func(i, a):
        ...     time.sleep(1)
        ...     return a

        >>> sr = pd.Series([1, 2, 3])

        >>> %timeit sr.vbt.apply_and_concat(3, apply_func=apply_func)
        3.01 s ± 2.15 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)

        >>> %timeit sr.vbt.apply_and_concat(3, apply_func=apply_func, use_ray=True)
        1.01 s ± 2.31 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
        ```
        """
        checks.assert_not_none(apply_func)
        # Optionally cast to 2d array
        if to_2d:
            obj = reshape_fns.to_2d(self.obj, raw=not keep_pd)
        else:
            if not keep_pd:
                obj = np.asarray(self.obj)
            else:
                obj = self.obj
        if checks.is_numba_func(apply_func) and numba_loop:
            if use_ray:
                raise ValueError("Ray cannot be used within Numba")
            result = combine_fns.apply_and_concat_one_nb(
                ntimes, apply_func, obj, *args, **kwargs)
        else:
            if use_ray:
                result = combine_fns.apply_and_concat_one_ray(
                    ntimes, apply_func, obj, *args, **kwargs)
            else:
                result = combine_fns.apply_and_concat_one(
                    ntimes, apply_func, obj, *args, **kwargs)
        # Build column hierarchy
        if keys is not None:
            new_columns = index_fns.combine_indexes(
                [keys, self.wrapper.columns])
        else:
            top_columns = pd.Index(np.arange(ntimes), name='apply_idx')
            new_columns = index_fns.combine_indexes(
                [top_columns, self.wrapper.columns])
        return self.wrapper.wrap(result,
                                 group_by=False,
                                 **merge_dicts(dict(columns=new_columns),
                                               wrap_kwargs))
예제 #11
0
        def custom_func(
            input_list: tp.List[tp.AnyArray],
            in_output_list: tp.List[tp.List[tp.AnyArray]],
            param_list: tp.List[tp.List[tp.Param]],
            *args,
            input_shape: tp.Optional[tp.Shape] = None,
            flex_2d: tp.Optional[bool] = None,
            entry_args: tp.Optional[tp.Args] = None,
            exit_args: tp.Optional[tp.Args] = None,
            cache_args: tp.Optional[tp.Args] = None,
            entry_kwargs: tp.KwargsLike = None,
            exit_kwargs: tp.KwargsLike = None,
            cache_kwargs: tp.KwargsLike = None,
            return_cache: bool = False,
            use_cache: tp.Optional[CacheOutputT] = None,
            **_kwargs
        ) -> tp.Union[CacheOutputT, tp.Array2d, tp.List[tp.Array2d]]:
            # Get arguments
            if len(input_list) == 0:
                if input_shape is None:
                    raise ValueError(
                        "Pass input_shape if no input time series were passed")
            else:
                input_shape = input_list[0].shape

            if entry_args is None:
                entry_args = ()
            if exit_args is None:
                exit_args = ()
            if cache_args is None:
                cache_args = ()
            if mode == FactoryMode.Entries:
                if len(entry_args) > 0:
                    raise ValueError(
                        "Use *args instead of entry_args with FactoryMode.Entries"
                    )
                entry_args = args
            elif mode == FactoryMode.Exits or (mode == FactoryMode.Chain
                                               and entry_choice_func
                                               == first_choice_nb):
                if len(exit_args) > 0:
                    raise ValueError(
                        "Use *args instead of exit_args "
                        "with FactoryMode.Exits or FactoryMode.Chain")
                exit_args = args
            else:
                if len(args) > 0:
                    raise ValueError(
                        "*args cannot be used with FactoryMode.Both")

            if entry_kwargs is None:
                entry_kwargs = {}
            if exit_kwargs is None:
                exit_kwargs = {}
            if cache_kwargs is None:
                cache_kwargs = {}
            if mode == FactoryMode.Entries:
                if len(entry_kwargs) > 0:
                    raise ValueError(
                        "Use **kwargs instead of entry_kwargs with FactoryMode.Entries"
                    )
                entry_kwargs = _kwargs
            elif mode == FactoryMode.Exits or (mode == FactoryMode.Chain
                                               and entry_choice_func
                                               == first_choice_nb):
                if len(exit_kwargs) > 0:
                    raise ValueError(
                        "Use **kwargs instead of exit_kwargs "
                        "with FactoryMode.Exits or FactoryMode.Chain")
                exit_kwargs = _kwargs
            else:
                if len(_kwargs) > 0:
                    raise ValueError(
                        "*args cannot be used with FactoryMode.Both")

            kwargs_defaults = dict(
                input_shape=input_shape,
                wait=1,
                until_next=True,
                skip_until_exit=False,
                pick_first=True,
                flex_2d=flex_2d,
            )
            if mode == FactoryMode.Entries:
                kwargs_defaults['pick_first'] = False
            entry_kwargs = merge_dicts(kwargs_defaults, entry_kwargs)
            exit_kwargs = merge_dicts(kwargs_defaults, exit_kwargs)
            cache_kwargs = merge_dicts(kwargs_defaults, cache_kwargs)
            entry_wait = entry_kwargs['wait']
            exit_wait = exit_kwargs['wait']
            entry_pick_first = entry_kwargs['pick_first']
            exit_pick_first = exit_kwargs['pick_first']
            until_next = exit_kwargs['until_next']
            skip_until_exit = exit_kwargs['skip_until_exit']

            # Distribute arguments across functions
            entry_input_tuple = ()
            exit_input_tuple = ()
            cache_input_tuple = ()
            for input_name in entry_input_names:
                entry_input_tuple += (
                    input_list[input_names.index(input_name)], )
            for input_name in exit_input_names:
                exit_input_tuple += (
                    input_list[input_names.index(input_name)], )
            for input_name in cache_input_names:
                cache_input_tuple += (
                    input_list[input_names.index(input_name)], )

            entry_in_output_list = []
            exit_in_output_list = []
            cache_in_output_list = []
            for in_output_name in entry_in_output_names:
                entry_in_output_list.append(
                    in_output_list[in_output_names.index(in_output_name)])
            for in_output_name in exit_in_output_names:
                exit_in_output_list.append(
                    in_output_list[in_output_names.index(in_output_name)])
            for in_output_name in cache_in_output_names:
                cache_in_output_list.append(
                    in_output_list[in_output_names.index(in_output_name)])

            entry_param_list = []
            exit_param_list = []
            cache_param_list = []
            for param_name in entry_param_names:
                entry_param_list.append(
                    param_list[param_names.index(param_name)])
            for param_name in exit_param_names:
                exit_param_list.append(
                    param_list[param_names.index(param_name)])
            for param_name in cache_param_names:
                cache_param_list.append(
                    param_list[param_names.index(param_name)])

            n_params = len(param_list[0]) if len(param_list) > 0 else 1
            entry_in_output_tuples = list(zip(*entry_in_output_list))
            exit_in_output_tuples = list(zip(*exit_in_output_list))
            entry_param_tuples = list(zip(*entry_param_list))
            exit_param_tuples = list(zip(*exit_param_list))

            def _build_more_args(func_settings: tp.Kwargs,
                                 func_kwargs: tp.Kwargs) -> tp.Args:
                pass_kwargs = func_settings.get('pass_kwargs', [])
                if isinstance(pass_kwargs, dict):
                    pass_kwargs = list(pass_kwargs.items())
                more_args = ()
                for key in pass_kwargs:
                    value = None
                    if isinstance(key, tuple):
                        key, value = key
                    else:
                        if key.startswith('temp_idx_arr'):
                            value = np.empty((input_shape[0], ), dtype=np.int_)
                    value = func_kwargs.get(key, value)
                    more_args += (value, )
                return more_args

            entry_more_args = _build_more_args(entry_settings, entry_kwargs)
            exit_more_args = _build_more_args(exit_settings, exit_kwargs)
            cache_more_args = _build_more_args(cache_settings, cache_kwargs)

            # Caching
            cache = use_cache
            if cache is None and cache_func is not None:
                _cache_in_output_list = cache_in_output_list
                _cache_param_list = cache_param_list
                if checks.is_numba_func(cache_func):
                    if len(_cache_in_output_list) > 0:
                        _cache_in_output_list = [
                            to_typed_list(in_outputs)
                            for in_outputs in _cache_in_output_list
                        ]
                    if len(_cache_param_list) > 0:
                        _cache_param_list = [
                            to_typed_list(params)
                            for params in _cache_param_list
                        ]

                cache = cache_func(*cache_input_tuple, *_cache_in_output_list,
                                   *_cache_param_list, *cache_args,
                                   *cache_more_args)
            if return_cache:
                return cache
            if cache is None:
                cache = ()
            if not isinstance(cache, tuple):
                cache = (cache, )

            entry_cache = ()
            exit_cache = ()
            if entry_settings.get('pass_cache', False):
                entry_cache = cache
            if exit_settings.get('pass_cache', False):
                exit_cache = cache

            # Apply and concatenate
            if mode == FactoryMode.Entries:
                if len(entry_in_output_names) > 0:
                    if numba_loop:
                        _entry_in_output_tuples = (
                            to_typed_list(entry_in_output_tuples), )
                    else:
                        _entry_in_output_tuples = (entry_in_output_tuples, )
                else:
                    _entry_in_output_tuples = ()
                if len(entry_param_names) > 0:
                    if numba_loop:
                        _entry_param_tuples = (
                            to_typed_list(entry_param_tuples), )
                    else:
                        _entry_param_tuples = (entry_param_tuples, )
                else:
                    _entry_param_tuples = ()

                return apply_and_concat_func(
                    n_params, apply_func, input_shape, entry_pick_first,
                    entry_input_tuple, *_entry_in_output_tuples,
                    *_entry_param_tuples,
                    entry_args + entry_more_args + entry_cache)

            elif mode == FactoryMode.Exits:
                if len(exit_in_output_names) > 0:
                    if numba_loop:
                        _exit_in_output_tuples = (
                            to_typed_list(exit_in_output_tuples), )
                    else:
                        _exit_in_output_tuples = (exit_in_output_tuples, )
                else:
                    _exit_in_output_tuples = ()
                if len(exit_param_names) > 0:
                    if numba_loop:
                        _exit_param_tuples = (
                            to_typed_list(exit_param_tuples), )
                    else:
                        _exit_param_tuples = (exit_param_tuples, )
                else:
                    _exit_param_tuples = ()

                return apply_and_concat_func(
                    n_params, apply_func, input_list[0], exit_wait, until_next,
                    skip_until_exit, exit_pick_first, exit_input_tuple,
                    *_exit_in_output_tuples, *_exit_param_tuples,
                    exit_args + exit_more_args + exit_cache)

            else:
                if len(entry_in_output_names) > 0:
                    if numba_loop:
                        _entry_in_output_tuples = (
                            to_typed_list(entry_in_output_tuples), )
                    else:
                        _entry_in_output_tuples = (entry_in_output_tuples, )
                else:
                    _entry_in_output_tuples = ()
                if len(entry_param_names) > 0:
                    if numba_loop:
                        _entry_param_tuples = (
                            to_typed_list(entry_param_tuples), )
                    else:
                        _entry_param_tuples = (entry_param_tuples, )
                else:
                    _entry_param_tuples = ()
                if len(exit_in_output_names) > 0:
                    if numba_loop:
                        _exit_in_output_tuples = (
                            to_typed_list(exit_in_output_tuples), )
                    else:
                        _exit_in_output_tuples = (exit_in_output_tuples, )
                else:
                    _exit_in_output_tuples = ()
                if len(exit_param_names) > 0:
                    if numba_loop:
                        _exit_param_tuples = (
                            to_typed_list(exit_param_tuples), )
                    else:
                        _exit_param_tuples = (exit_param_tuples, )
                else:
                    _exit_param_tuples = ()

                return apply_and_concat_func(
                    n_params, apply_func, input_shape, entry_wait, exit_wait,
                    entry_pick_first, exit_pick_first, entry_input_tuple,
                    exit_input_tuple, *_entry_in_output_tuples,
                    *_exit_in_output_tuples, *_entry_param_tuples,
                    *_exit_param_tuples,
                    entry_args + entry_more_args + entry_cache,
                    exit_args + exit_more_args + exit_cache)
예제 #12
0
    def from_apply_func(self, apply_func, caching_func=None):
        """Build indicator class around a custom apply function.

        In contrast to `IndicatorFactory.from_custom_func`, this method handles a lot of things for you,
        such as caching, parameter selection, and concatenation. All you have to do is to write `apply_func`
        that accepts a selection of parameters (single values as opposed to multiple values in 
        `IndicatorFactory.from_custom_func`) and does the calculation. It then automatically concatenates
        the results into a single array per output.

        While this approach is much more simpler, it is also less flexible, since you can only work with 
        one parameter selection at a time, and can't view all parameters.

        !!! note
            If `apply_func` is a Numba-compiled function: 

            * All inputs are automatically converted to NumPy arrays
            * Each argument in `*args` must be of a Numba-compatible type
            * You cannot pass keyword arguments
            * Your outputs must be arrays of the same shape, data type and data order

        Args:
            apply_func (function): A function (can be Numba-compiled) that takes broadcasted time 
                series arrays corresponding to `ts_names`, single parameter selection corresponding 
                to `param_names`, and other arguments and keyword arguments, and returns outputs 
                corresponding to `output_names`.
            caching_func (function): A caching function to preprocess data beforehand.
                All returned objects will be passed as additional arguments to `apply_func`.
        Returns:
            CustomIndicator
        Examples:
            ```python-repl
            >>> @njit
            ... def apply_func_nb(ts1, ts2, p1, p2, arg1):
            ...     return ts1 * p1 + arg1, ts2 * p2 + arg1

            >>> MyInd = vbt.IndicatorFactory(
            ...     ts_names=['ts1', 'ts2'],
            ...     param_names=['p1', 'p2'],
            ...     output_names=['o1', 'o2']
            ... ).from_apply_func(apply_func_nb)

            >>> myInd = MyInd.from_params(price_sm, price_sm * 2, [1, 2], [3, 4], 100)
            >>> print(myInd.o1)
            custom_p1       1      1      2      2
            custom_p2       3      3      4      4
                            a      b      a      b
            2018-01-01  101.0  105.0  102.0  110.0
            2018-01-02  102.0  104.0  104.0  108.0
            2018-01-03  103.0  103.0  106.0  106.0
            2018-01-04  104.0  102.0  108.0  104.0
            2018-01-05  105.0  101.0  110.0  102.0
            >>> print(myInd.o2)
            custom_p1       1      1      2      2
            custom_p2       3      3      4      4
                            a      b      a      b
            2018-01-01  106.0  130.0  108.0  140.0
            2018-01-02  112.0  124.0  116.0  132.0
            2018-01-03  118.0  118.0  124.0  124.0
            2018-01-04  124.0  112.0  132.0  116.0
            2018-01-05  130.0  106.0  140.0  108.0
            ```
        """
        output_names = self.output_names

        num_outputs = len(output_names)

        if checks.is_numba_func(apply_func):
            if num_outputs > 1:
                apply_and_concat_func = combine_fns.apply_and_concat_multiple_nb
            else:
                apply_and_concat_func = combine_fns.apply_and_concat_one_nb

            @njit
            def select_params_func_nb(i, apply_func, ts_list, param_tuples,
                                      *args):
                # Select the next tuple of parameters
                return apply_func(*ts_list, *param_tuples[i], *args)

            def custom_func(ts_list,
                            param_list,
                            *args,
                            return_cache=False,
                            cache=None):
                # avoid deprecation warnings
                typed_ts_list = tuple(
                    map(lambda x: x.vbt.to_2d_array(), ts_list))
                typed_param_tuples = List()
                for param_tuple in list(zip(*param_list)):
                    typed_param_tuples.append(param_tuple)

                # Caching
                if cache is None and caching_func is not None:
                    cache = caching_func(*typed_ts_list, *param_list, *args)
                if return_cache:
                    return cache
                if cache is None:
                    cache = ()
                if not isinstance(cache, (tuple, list, List)):
                    cache = (cache, )

                return apply_and_concat_func(param_list[0].shape[0],
                                             select_params_func_nb, apply_func,
                                             typed_ts_list, typed_param_tuples,
                                             *args, *cache)
        else:
            if num_outputs > 1:
                apply_and_concat_func = combine_fns.apply_and_concat_multiple
            else:
                apply_and_concat_func = combine_fns.apply_and_concat_one

            def select_params_func(i, apply_func, ts_list, param_list, *args,
                                   **kwargs):
                # Select the next tuple of parameters
                param_is = list(map(lambda x: x[i], param_list))
                return apply_func(*ts_list, *param_is, *args, **kwargs)

            def custom_func(ts_list,
                            param_list,
                            *args,
                            return_cache=False,
                            cache=None,
                            **kwargs):
                # Caching
                if cache is None and caching_func is not None:
                    cache = caching_func(*ts_list, *param_list, *args,
                                         **kwargs)
                if return_cache:
                    return cache
                if cache is None:
                    cache = ()
                if not isinstance(cache, (tuple, list, List)):
                    cache = (cache, )

                return apply_and_concat_func(param_list[0].shape[0],
                                             select_params_func, apply_func,
                                             ts_list, param_list, *args,
                                             *cache, **kwargs)

        return self.from_custom_func(custom_func, pass_lists=True)
예제 #13
0
    def combine_with_multiple(self,
                              others,
                              *args,
                              combine_func=None,
                              pass_2d=False,
                              concat=False,
                              broadcast_kwargs={},
                              as_columns=None,
                              **kwargs):
        """Combine with `others` using `combine_func`.

        All arguments will be broadcasted using `vectorbt.utils.reshape_fns.broadcast`
        with `broadcast_kwargs`.

        If `concat` is `True`, concatenate the results along columns, 
        see `vectorbt.utils.combine_fns.combine_and_concat`.
        Otherwise, pairwise combine into a Series/DataFrame of the same shape, 
        see `vectorbt.utils.combine_fns.combine_multiple`.

        Arguments `*args` and `**kwargs` will be directly passed to `combine_func`. 
        If `pass_2d` is `True`, 2-dimensional NumPy arrays will be passed, otherwise as is.
        Use `as_columns` as a top-level column level.

        !!! note
            If `combine_func` is Numba-compiled, will broadcast using `writeable=True` and
            copy using `order='C'` flags, which can lead to an expensive computation overhead if
            passed objects are large and have different shape/memory order. You also must ensure 
            that all objects have the same data type.

            Also remember to bring each in `*args` to a Numba-compatible format.

        Example:
            ```python-repl
            >>> import pandas as pd
            >>> sr = pd.Series([1, 2], index=['x', 'y'])
            >>> df = pd.DataFrame([[3, 4], [5, 6]], index=['x', 'y'], columns=['a', 'b'])

            >>> print(sr.vbt.combine_with_multiple([df, df*2], 
            ...     combine_func=lambda x, y: x + y))
                a   b
            x  10  13
            y  17  20

            >>> print(sr.vbt.combine_with_multiple([df, df*2], 
            ...     combine_func=lambda x, y: x + y, concat=True, as_columns=['c', 'd']))
                  c       d    
               a  b   a   b
            x  4  5   7   9
            y  7  8  12  14
            ```"""
        others = tuple(
            map(lambda x: x._obj
                if isinstance(x, Base_Accessor) else x, others))
        checks.assert_not_none(combine_func)
        checks.assert_type(others, Iterable)
        # Broadcast arguments
        if checks.is_numba_func(combine_func):
            # Numba requires writable arrays
            broadcast_kwargs = {**dict(writeable=True), **broadcast_kwargs}
            # Plus all of our arrays must be in the same order
            broadcast_kwargs['copy_kwargs'] = {
                **dict(order='C'),
                **broadcast_kwargs.get('copy_kwargs', {})
            }
        new_obj, *new_others = reshape_fns.broadcast(self._obj, *others,
                                                     **broadcast_kwargs)
        # Optionally cast to 2d array
        if pass_2d:
            bc_arrays = tuple(
                map(lambda x: reshape_fns.to_2d(np.asarray(x)),
                    (new_obj, *new_others)))
        else:
            bc_arrays = tuple(
                map(lambda x: np.asarray(x), (new_obj, *new_others)))
        if concat:
            # Concat the results horizontally
            if checks.is_numba_func(combine_func):
                for i in range(1, len(bc_arrays)):
                    checks.assert_same_meta(bc_arrays[i - 1], bc_arrays[i])
                result = combine_fns.combine_and_concat_nb(
                    bc_arrays[0], bc_arrays[1:], combine_func, *args, **kwargs)
            else:
                result = combine_fns.combine_and_concat(
                    bc_arrays[0], bc_arrays[1:], combine_func, *args, **kwargs)
            columns = new_obj.vbt.columns
            if as_columns is not None:
                new_columns = index_fns.combine_indexes(as_columns, columns)
            else:
                new_columns = index_fns.tile_index(columns, len(others))
            return new_obj.vbt.wrap_array(result, columns=new_columns)
        else:
            # Combine arguments pairwise into one object
            if checks.is_numba_func(combine_func):
                for i in range(1, len(bc_arrays)):
                    checks.assert_same_dtype(bc_arrays[i - 1], bc_arrays[i])
                result = combine_fns.combine_multiple_nb(
                    bc_arrays, combine_func, *args, **kwargs)
            else:
                result = combine_fns.combine_multiple(bc_arrays, combine_func,
                                                      *args, **kwargs)
            return new_obj.vbt.wrap_array(result)
예제 #14
0
    def combine_with_multiple(self,
                              others,
                              *args,
                              combine_func=None,
                              concat=False,
                              broadcast_kwargs={},
                              as_columns=None,
                              **kwargs):
        """Broadcast with other objects to the same shape and combine them all pairwise.

        The returned shape is the same as broadcasted shape if concat is False.
        The returned shape is concatenation of broadcasted shapes if concat is True."""
        others = tuple(
            map(lambda x: x._obj
                if isinstance(x, Base_Accessor) else x, others))
        checks.assert_not_none(combine_func)
        checks.assert_type(others, Iterable)
        # Broadcast arguments
        if checks.is_numba_func(combine_func):
            # Numba requires writable arrays
            broadcast_kwargs = {**dict(writeable=True), **broadcast_kwargs}
            # Plus all of our arrays must be in the same order
            broadcast_kwargs['copy_kwargs'] = {
                **dict(order='C'),
                **broadcast_kwargs.get('copy_kwargs', {})
            }
        new_obj, *new_others = reshape_fns.broadcast(self._obj, *others,
                                                     **broadcast_kwargs)
        broadcasted = tuple(map(np.asarray, (new_obj, *new_others)))
        if concat:
            # Concat the results horizontally
            if checks.is_numba_func(combine_func):
                for i in range(1, len(broadcasted)):
                    # NOTE: all inputs must have the same dtype
                    checks.assert_same_meta(broadcasted[i - 1], broadcasted[i])
                result = combine_fns.combine_and_concat_nb(
                    broadcasted[0], broadcasted[1:], combine_func, *args,
                    **kwargs)
            else:
                result = combine_fns.combine_and_concat(
                    broadcasted[0], broadcasted[1:], combine_func, *args,
                    **kwargs)
            if as_columns is not None:
                new_columns = index_fns.combine(
                    as_columns,
                    reshape_fns.to_2d(new_obj).columns)
            else:
                new_columns = index_fns.tile(
                    reshape_fns.to_2d(new_obj).columns, len(others))
            return new_obj.vbt.wrap_array(result, columns=new_columns)
        else:
            # Combine arguments pairwise into one object
            if checks.is_numba_func(combine_func):
                for i in range(1, len(broadcasted)):
                    # NOTE: all inputs must have the same dtype
                    checks.assert_same_dtype(broadcasted[i - 1],
                                             broadcasted[i])
                result = combine_fns.combine_multiple_nb(
                    broadcasted, combine_func, *args, **kwargs)
            else:
                result = combine_fns.combine_multiple(broadcasted,
                                                      combine_func, *args,
                                                      **kwargs)
            return new_obj.vbt.wrap_array(result)
예제 #15
0
    def combine_with_multiple(self, others, *args, combine_func=None, to_2d=False,
                              concat=False, broadcast_kwargs={}, keys=None, **kwargs):
        """Combine with `others` using `combine_func`.

        All arguments will be broadcast using `vectorbt.base.reshape_fns.broadcast`
        with `broadcast_kwargs`.

        If `concat` is True, concatenate the results along columns,
        see `vectorbt.base.combine_fns.combine_and_concat`.
        Otherwise, pairwise combine into a Series/DataFrame of the same shape, 
        see `vectorbt.base.combine_fns.combine_multiple`.

        Arguments `*args` and `**kwargs` will be directly passed to `combine_func`. 
        If `to_2d` is True, 2-dimensional NumPy arrays will be passed, otherwise as is.
        Use `keys` as the outermost level.

        !!! note
            If `combine_func` is Numba-compiled, will broadcast using `WRITEABLE` and `C_CONTIGUOUS`
            flags, which can lead to an expensive computation overhead if passed objects are large and
            have different shape/memory order. You also must ensure that all objects have the same data type.

            Also remember to bring each in `*args` to a Numba-compatible format.

        ## Example

        ```python-repl
        >>> import vectorbt as vbt
        >>> import pandas as pd

        >>> sr = pd.Series([1, 2], index=['x', 'y'])
        >>> df = pd.DataFrame([[3, 4], [5, 6]], index=['x', 'y'], columns=['a', 'b'])

        >>> sr.vbt.combine_with_multiple([df, df*2],
        ...     combine_func=lambda x, y: x + y)
            a   b
        x  10  13
        y  17  20

        >>> sr.vbt.combine_with_multiple([df, df*2],
        ...     combine_func=lambda x, y: x + y, concat=True, keys=['c', 'd'])
              c       d
           a  b   a   b
        x  4  5   7   9
        y  7  8  12  14
        ```
        """
        others = tuple(map(lambda x: x._obj if isinstance(x, Base_Accessor) else x, others))
        checks.assert_not_none(combine_func)
        checks.assert_type(others, Iterable)
        # Broadcast arguments
        if checks.is_numba_func(combine_func):
            # Numba requires writeable arrays
            # Plus all of our arrays must be in the same order
            broadcast_kwargs = merge_dicts(dict(require_kwargs=dict(requirements=['W', 'C'])), broadcast_kwargs)
        new_obj, *new_others = reshape_fns.broadcast(self._obj, *others, **broadcast_kwargs)
        # Optionally cast to 2d array
        if to_2d:
            bc_arrays = tuple(map(lambda x: reshape_fns.to_2d(x, raw=True), (new_obj, *new_others)))
        else:
            bc_arrays = tuple(map(lambda x: np.asarray(x), (new_obj, *new_others)))
        if concat:
            # Concat the results horizontally
            if checks.is_numba_func(combine_func):
                for i in range(1, len(bc_arrays)):
                    checks.assert_meta_equal(bc_arrays[i - 1], bc_arrays[i])
                result = combine_fns.combine_and_concat_nb(bc_arrays[0], bc_arrays[1:], combine_func, *args, **kwargs)
            else:
                result = combine_fns.combine_and_concat(bc_arrays[0], bc_arrays[1:], combine_func, *args, **kwargs)
            columns = new_obj.vbt.wrapper.columns
            if keys is not None:
                new_columns = index_fns.combine_indexes(keys, columns)
            else:
                top_columns = pd.Index(np.arange(len(new_others)), name='combine_idx')
                new_columns = index_fns.combine_indexes(top_columns, columns)
            return new_obj.vbt.wrapper.wrap(result, columns=new_columns)
        else:
            # Combine arguments pairwise into one object
            if checks.is_numba_func(combine_func):
                for i in range(1, len(bc_arrays)):
                    checks.assert_dtype_equal(bc_arrays[i - 1], bc_arrays[i])
                result = combine_fns.combine_multiple_nb(bc_arrays, combine_func, *args, **kwargs)
            else:
                result = combine_fns.combine_multiple(bc_arrays, combine_func, *args, **kwargs)
            return new_obj.vbt.wrapper.wrap(result)
예제 #16
0
    def combine_with(self,
                     other,
                     *args,
                     combine_func=None,
                     keep_pd=False,
                     to_2d=False,
                     broadcast=True,
                     broadcast_kwargs=None,
                     wrap_kwargs=None,
                     **kwargs):
        """Combine both using `combine_func` into a Series/DataFrame of the same shape.

        Args:
            other (array_like): Object to be combined with this array.
            *args: Variable arguments passed to `combine_func`.
            combine_func (callable): Function to combine two arrays.

                Can be Numba-compiled.
            keep_pd (bool): Whether to keep inputs as pandas objects, otherwise convert to NumPy arrays.
            to_2d (bool): Whether to reshape inputs to 2-dim arrays, otherwise keep as-is.
            broadcast (bool): Whether to broadcast all inputs.
            broadcast_kwargs (dict): Keyword arguments passed to `vectorbt.base.reshape_fns.broadcast`.
            wrap_kwargs (dict): Keyword arguments passed to `vectorbt.base.array_wrapper.ArrayWrapper.wrap`.
            **kwargs: Keyword arguments passed to `combine_func`.

        !!! note
            The resulted array must have the same shape as broadcast input arrays.

        ## Example

        ```python-repl
        >>> import vectorbt as vbt
        >>> import pandas as pd

        >>> sr = pd.Series([1, 2], index=['x', 'y'])
        >>> df = pd.DataFrame([[3, 4], [5, 6]], index=['x', 'y'], columns=['a', 'b'])
        >>> sr.vbt.combine_with(df, combine_func=lambda x, y: x + y)
           a  b
        x  4  5
        y  7  8
        ```
        """
        if isinstance(other, BaseAccessor):
            other = other._obj
        checks.assert_not_none(combine_func)
        if broadcast:
            if broadcast_kwargs is None:
                broadcast_kwargs = {}
            if checks.is_numba_func(combine_func):
                # Numba requires writable arrays
                broadcast_kwargs = merge_dicts(
                    dict(require_kwargs=dict(requirements='W')),
                    broadcast_kwargs)
            new_obj, new_other = reshape_fns.broadcast(self._obj, other,
                                                       **broadcast_kwargs)
        else:
            new_obj, new_other = self._obj, other
        # Optionally cast to 2d array
        if to_2d:
            inputs = tuple(
                map(lambda x: reshape_fns.to_2d(x, raw=not keep_pd),
                    (new_obj, new_other)))
        else:
            if not keep_pd:
                inputs = tuple(
                    map(lambda x: np.asarray(x), (new_obj, new_other)))
            else:
                inputs = new_obj, new_other
        result = combine_func(inputs[0], inputs[1], *args, **kwargs)
        return new_obj.vbt.wrapper.wrap(result, **merge_dicts({}, wrap_kwargs))