Пример #1
0
    def combine(self,
                other: tp.MaybeTupleList[tp.Union[tp.ArrayLike,
                                                  "BaseAccessor"]],
                *args,
                allow_multiple: bool = True,
                combine_func: tp.Optional[tp.Callable] = None,
                keep_pd: bool = False,
                to_2d: bool = False,
                concat: bool = False,
                numba_loop: bool = False,
                use_ray: bool = False,
                broadcast: bool = True,
                broadcast_kwargs: tp.KwargsLike = None,
                keys: tp.Optional[tp.IndexLike] = None,
                wrap_kwargs: tp.KwargsLike = None,
                **kwargs) -> tp.SeriesFrame:
        """Combine with `other` using `combine_func`.

        Args:
            other (array_like): Object to combine this array with.
            *args: Variable arguments passed to `combine_func`.
            allow_multiple (bool): Whether a tuple/list will be considered as multiple objects in `other`.
            combine_func (callable): Function to combine two arrays.

                Can be Numba-compiled.
            keep_pd (bool): Whether to keep inputs as pandas objects, otherwise convert to NumPy arrays.
            to_2d (bool): Whether to reshape inputs to 2-dim arrays, otherwise keep as-is.
            concat (bool): Whether to concatenate the results along the column axis.
                Otherwise, pairwise combine into a Series/DataFrame of the same shape.

                If True, see `vectorbt.base.combine_fns.combine_and_concat`.
                If False, see `vectorbt.base.combine_fns.combine_multiple`.
            numba_loop (bool): Whether to loop using Numba.

                Set to True when iterating large number of times over small input,
                but note that Numba doesn't support variable keyword arguments.
            use_ray (bool): Whether to use Ray to execute `combine_func` in parallel.

                Only works with `numba_loop` set to False and `concat` is set to True.
                See `vectorbt.base.combine_fns.ray_apply` for related keyword arguments.
            broadcast (bool): Whether to broadcast all inputs.
            broadcast_kwargs (dict): Keyword arguments passed to `vectorbt.base.reshape_fns.broadcast`.
            keys (index_like): Outermost column level.
            wrap_kwargs (dict): Keyword arguments passed to `vectorbt.base.array_wrapper.ArrayWrapper.wrap`.
            **kwargs: Keyword arguments passed to `combine_func`.

        !!! note
            If `combine_func` is Numba-compiled, will broadcast using `WRITEABLE` and `C_CONTIGUOUS`
            flags, which can lead to an expensive computation overhead if passed objects are large and
            have different shape/memory order. You also must ensure that all objects have the same data type.

            Also remember to bring each in `*args` to a Numba-compatible format.

        ## Example

        ```python-repl
        >>> import vectorbt as vbt
        >>> import pandas as pd

        >>> sr = pd.Series([1, 2], index=['x', 'y'])
        >>> df = pd.DataFrame([[3, 4], [5, 6]], index=['x', 'y'], columns=['a', 'b'])

        >>> sr.vbt.combine(df, combine_func=lambda x, y: x + y)
           a  b
        x  4  5
        y  7  8

        >>> sr.vbt.combine([df, df*2], combine_func=lambda x, y: x + y)
            a   b
        x  10  13
        y  17  20

        >>> sr.vbt.combine([df, df*2], combine_func=lambda x, y: x + y, concat=True, keys=['c', 'd'])
              c       d
           a  b   a   b
        x  4  5   7   9
        y  7  8  12  14
        ```

        Use Ray for small inputs and large processing times:

        ```python-repl
        >>> def combine_func(a, b):
        ...     time.sleep(1)
        ...     return a + b

        >>> sr = pd.Series([1, 2, 3])

        >>> %timeit sr.vbt.combine([1, 1, 1], combine_func=combine_func)
        3.01 s ± 2.98 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)

        >>> %timeit sr.vbt.combine([1, 1, 1], combine_func=combine_func, concat=True, use_ray=True)
        1.02 s ± 2.32 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
        ```
        """
        if not allow_multiple or not isinstance(other, (tuple, list)):
            others = (other, )
        else:
            others = other
        others = tuple(
            map(lambda x: x.obj if isinstance(x, BaseAccessor) else x, others))
        checks.assert_not_none(combine_func)
        # Broadcast arguments
        if broadcast:
            if broadcast_kwargs is None:
                broadcast_kwargs = {}
            if checks.is_numba_func(combine_func):
                # Numba requires writeable arrays
                # Plus all of our arrays must be in the same order
                broadcast_kwargs = merge_dicts(
                    dict(require_kwargs=dict(requirements=['W', 'C'])),
                    broadcast_kwargs)
            new_obj, *new_others = reshape_fns.broadcast(
                self.obj, *others, **broadcast_kwargs)
        else:
            new_obj, new_others = self.obj, others
        if not checks.is_pandas(new_obj):
            new_obj = ArrayWrapper.from_shape(new_obj.shape).wrap(new_obj)
        # Optionally cast to 2d array
        if to_2d:
            inputs = tuple(
                map(lambda x: reshape_fns.to_2d(x, raw=not keep_pd),
                    (new_obj, *new_others)))
        else:
            if not keep_pd:
                inputs = tuple(
                    map(lambda x: np.asarray(x), (new_obj, *new_others)))
            else:
                inputs = new_obj, *new_others
        if len(inputs) == 2:
            result = combine_func(inputs[0], inputs[1], *args, **kwargs)
            return ArrayWrapper.from_obj(new_obj).wrap(
                result, **merge_dicts({}, wrap_kwargs))
        if concat:
            # Concat the results horizontally
            if checks.is_numba_func(combine_func) and numba_loop:
                if use_ray:
                    raise ValueError("Ray cannot be used within Numba")
                for i in range(1, len(inputs)):
                    checks.assert_meta_equal(inputs[i - 1], inputs[i])
                result = combine_fns.combine_and_concat_nb(
                    inputs[0], inputs[1:], combine_func, *args, **kwargs)
            else:
                if use_ray:
                    result = combine_fns.combine_and_concat_ray(
                        inputs[0], inputs[1:], combine_func, *args, **kwargs)
                else:
                    result = combine_fns.combine_and_concat(
                        inputs[0], inputs[1:], combine_func, *args, **kwargs)
            columns = ArrayWrapper.from_obj(new_obj).columns
            if keys is not None:
                new_columns = index_fns.combine_indexes([keys, columns])
            else:
                top_columns = pd.Index(np.arange(len(new_others)),
                                       name='combine_idx')
                new_columns = index_fns.combine_indexes([top_columns, columns])
            return ArrayWrapper.from_obj(new_obj).wrap(
                result, **merge_dicts(dict(columns=new_columns), wrap_kwargs))
        else:
            # Combine arguments pairwise into one object
            if use_ray:
                raise ValueError("Ray cannot be used with concat=False")
            if checks.is_numba_func(combine_func) and numba_loop:
                for i in range(1, len(inputs)):
                    checks.assert_dtype_equal(inputs[i - 1], inputs[i])
                result = combine_fns.combine_multiple_nb(
                    inputs, combine_func, *args, **kwargs)
            else:
                result = combine_fns.combine_multiple(inputs, combine_func,
                                                      *args, **kwargs)
            return ArrayWrapper.from_obj(new_obj).wrap(
                result, **merge_dicts({}, wrap_kwargs))
Пример #2
0
    def combine_with_multiple(self, others, *args, combine_func=None, to_2d=False,
                              concat=False, broadcast_kwargs={}, keys=None, **kwargs):
        """Combine with `others` using `combine_func`.

        All arguments will be broadcast using `vectorbt.base.reshape_fns.broadcast`
        with `broadcast_kwargs`.

        If `concat` is True, concatenate the results along columns,
        see `vectorbt.base.combine_fns.combine_and_concat`.
        Otherwise, pairwise combine into a Series/DataFrame of the same shape, 
        see `vectorbt.base.combine_fns.combine_multiple`.

        Arguments `*args` and `**kwargs` will be directly passed to `combine_func`. 
        If `to_2d` is True, 2-dimensional NumPy arrays will be passed, otherwise as is.
        Use `keys` as the outermost level.

        !!! note
            If `combine_func` is Numba-compiled, will broadcast using `WRITEABLE` and `C_CONTIGUOUS`
            flags, which can lead to an expensive computation overhead if passed objects are large and
            have different shape/memory order. You also must ensure that all objects have the same data type.

            Also remember to bring each in `*args` to a Numba-compatible format.

        ## Example

        ```python-repl
        >>> import vectorbt as vbt
        >>> import pandas as pd

        >>> sr = pd.Series([1, 2], index=['x', 'y'])
        >>> df = pd.DataFrame([[3, 4], [5, 6]], index=['x', 'y'], columns=['a', 'b'])

        >>> sr.vbt.combine_with_multiple([df, df*2],
        ...     combine_func=lambda x, y: x + y)
            a   b
        x  10  13
        y  17  20

        >>> sr.vbt.combine_with_multiple([df, df*2],
        ...     combine_func=lambda x, y: x + y, concat=True, keys=['c', 'd'])
              c       d
           a  b   a   b
        x  4  5   7   9
        y  7  8  12  14
        ```
        """
        others = tuple(map(lambda x: x._obj if isinstance(x, Base_Accessor) else x, others))
        checks.assert_not_none(combine_func)
        checks.assert_type(others, Iterable)
        # Broadcast arguments
        if checks.is_numba_func(combine_func):
            # Numba requires writeable arrays
            # Plus all of our arrays must be in the same order
            broadcast_kwargs = merge_dicts(dict(require_kwargs=dict(requirements=['W', 'C'])), broadcast_kwargs)
        new_obj, *new_others = reshape_fns.broadcast(self._obj, *others, **broadcast_kwargs)
        # Optionally cast to 2d array
        if to_2d:
            bc_arrays = tuple(map(lambda x: reshape_fns.to_2d(x, raw=True), (new_obj, *new_others)))
        else:
            bc_arrays = tuple(map(lambda x: np.asarray(x), (new_obj, *new_others)))
        if concat:
            # Concat the results horizontally
            if checks.is_numba_func(combine_func):
                for i in range(1, len(bc_arrays)):
                    checks.assert_meta_equal(bc_arrays[i - 1], bc_arrays[i])
                result = combine_fns.combine_and_concat_nb(bc_arrays[0], bc_arrays[1:], combine_func, *args, **kwargs)
            else:
                result = combine_fns.combine_and_concat(bc_arrays[0], bc_arrays[1:], combine_func, *args, **kwargs)
            columns = new_obj.vbt.wrapper.columns
            if keys is not None:
                new_columns = index_fns.combine_indexes(keys, columns)
            else:
                top_columns = pd.Index(np.arange(len(new_others)), name='combine_idx')
                new_columns = index_fns.combine_indexes(top_columns, columns)
            return new_obj.vbt.wrapper.wrap(result, columns=new_columns)
        else:
            # Combine arguments pairwise into one object
            if checks.is_numba_func(combine_func):
                for i in range(1, len(bc_arrays)):
                    checks.assert_dtype_equal(bc_arrays[i - 1], bc_arrays[i])
                result = combine_fns.combine_multiple_nb(bc_arrays, combine_func, *args, **kwargs)
            else:
                result = combine_fns.combine_multiple(bc_arrays, combine_func, *args, **kwargs)
            return new_obj.vbt.wrapper.wrap(result)