Python combine_indexes 예제들, vectorbt.base.index_fns.combine_indexes Python 예제들

예제 #1

0

파일 보기

파일: accessors.py 프로젝트: yamen/vectorbt

    def concat(self_or_cls, *others, keys=None, broadcast_kwargs={}):
        """Concatenate with `others` along columns.

        All arguments will be broadcasted using `vectorbt.base.reshape_fns.broadcast`
        with `broadcast_kwargs`. Use `keys` as the outermost level.

        Example:
            ```python-repl
            >>> import vectorbt as vbt
            >>> import pandas as pd
            >>> sr = pd.Series([1, 2], index=['x', 'y'])
            >>> df = pd.DataFrame([[3, 4], [5, 6]], index=['x', 'y'], columns=['a', 'b'])

            >>> sr.vbt.concat(df, keys=['c', 'd'])
                  c     d
               a  b  a  b
            x  1  1  3  4
            y  2  2  5  6
            ```"""
        others = tuple(map(lambda x: x._obj if isinstance(x, Base_Accessor) else x, others))
        if isinstance(self_or_cls, type):
            objs = others
        else:
            objs = (self_or_cls._obj,) + others
        broadcasted = reshape_fns.broadcast(*objs, **broadcast_kwargs)
        broadcasted = tuple(map(reshape_fns.to_2d, broadcasted))
        concatenated = pd.concat(broadcasted, axis=1)
        if keys is not None:
            concatenated.columns = index_fns.combine_indexes(keys, broadcasted[0].columns)
        return concatenated

예제 #2

0

파일 보기

파일: accessors.py 프로젝트: khprash/vectorbt

    def generate_stop_loss_exits(self,
                                 ts,
                                 stops,
                                 trailing=False,
                                 first=True,
                                 keys=None,
                                 broadcast_kwargs={}):
        """See `vectorbt.signals.nb.generate_stop_loss_exits_nb`.

        Arguments will be broadcasted using `vectorbt.base.reshape_fns.broadcast`
        with `broadcast_kwargs`. Argument `stops` can be either a single number, an array of 
        numbers, or a 3D array, where each matrix corresponds to a single configuration. 
        Use `keys` as the outermost level.

        Example:
            For each entry in `sig`, set stop loss for 10% and 20% below the entry price:

            ```python-repl
            >>> ts = pd.Series([1, 2, 3, 2, 1])
            >>> print(sig.vbt.signals.generate_stop_loss_exits(ts, [0.1, 0.5]))
            stop_loss                   0.1                  0.5
                            a      b      c      a      b      c
            2020-01-01  False  False  False  False  False  False
            2020-01-02  False  False  False  False  False  False
            2020-01-03  False  False  False  False  False  False
            2020-01-04  False   True   True  False  False  False
            2020-01-05  False  False  False  False  False   True
            >>> print(sig.vbt.signals.generate_stop_loss_exits(ts, [0.1, 0.5], trailing=True))
            trail_stop                  0.1                  0.5
                            a      b      c      a      b      c
            2020-01-01  False  False  False  False  False  False
            2020-01-02  False  False  False  False  False  False
            2020-01-03  False  False  False  False  False  False
            2020-01-04   True   True   True  False  False  False
            2020-01-05  False  False  False   True  False   True
            ```"""
        entries = self._obj
        checks.assert_type(ts, (pd.Series, pd.DataFrame))

        entries, ts = reshape_fns.broadcast(entries,
                                            ts,
                                            **broadcast_kwargs,
                                            writeable=True)
        stops = reshape_fns.broadcast_to_array_of(stops,
                                                  entries.vbt.to_2d_array())
        exits = nb.generate_stop_loss_exits_nb(entries.vbt.to_2d_array(),
                                               ts.vbt.to_2d_array(),
                                               stops,
                                               trailing=trailing,
                                               first=first)

        # Build column hierarchy
        if keys is not None:
            param_columns = keys
        else:
            name = 'trail_stop' if trailing else 'stop_loss'
            param_columns = index_fns.index_from_values(stops, name=name)
        columns = index_fns.combine_indexes(param_columns, entries.vbt.columns)
        return entries.vbt.wrap(exits, columns=columns)

예제 #3

0

파일 보기

    def repeat(self, n, keys=None, axis=1):
        """See `vectorbt.base.reshape_fns.repeat`.

        Set `axis` to 1 for columns and 0 for index.
        Use `keys` as the outermost level."""
        repeated = reshape_fns.repeat(self._obj, n, axis=axis)
        if keys is not None:
            if axis == 1:
                new_columns = index_fns.combine_indexes(
                    self.wrapper.columns, keys)
                return repeated.vbt.wrapper.wrap(repeated.values,
                                                 columns=new_columns)
            else:
                new_index = index_fns.combine_indexes(self.wrapper.index, keys)
                return repeated.vbt.wrapper.wrap(repeated.values,
                                                 index=new_index)
        return repeated

예제 #4

0

파일 보기

파일: accessors.py 프로젝트: jingmouren/vectorbt

    def repeat(self, n: int, keys: tp.Optional[tp.IndexLike] = None, axis: int = 1,
               wrap_kwargs: tp.KwargsLike = None) -> tp.SeriesFrame:
        """See `vectorbt.base.reshape_fns.repeat`.

        Set `axis` to 1 for columns and 0 for index.
        Use `keys` as the outermost level."""
        repeated = reshape_fns.repeat(self.obj, n, axis=axis)
        if keys is not None:
            if axis == 1:
                new_columns = index_fns.combine_indexes([self.wrapper.columns, keys])
                return ArrayWrapper.from_obj(repeated).wrap(
                    repeated.values, **merge_dicts(dict(columns=new_columns), wrap_kwargs))
            else:
                new_index = index_fns.combine_indexes([self.wrapper.index, keys])
                return ArrayWrapper.from_obj(repeated).wrap(
                    repeated.values, **merge_dicts(dict(index=new_index), wrap_kwargs))
        return repeated

예제 #5

0

파일 보기

    def repeat(self, n, keys=None):
        """See `vectorbt.base.reshape_fns.repeat`.

        Use `keys` as the outermost level."""
        repeated = reshape_fns.repeat(self._obj, n, axis=1)
        if keys is not None:
            new_columns = index_fns.combine_indexes(self.columns, keys)
            return self.wrap(repeated.values, columns=new_columns)
        return repeated

예제 #6

0

파일 보기

    def tile(self, n, keys=None):
        """See `vectorbt.base.reshape_fns.tile`.

        Use `keys` as the outermost level."""
        tiled = reshape_fns.tile(self._obj, n, axis=1)
        if keys is not None:
            new_columns = index_fns.combine_indexes(keys, self.columns)
            return self.wrap(tiled.values, columns=new_columns)
        return tiled

예제 #7

0

파일 보기

    def apply_and_concat(self, ntimes, *args, apply_func=None, to_2d=False, keys=None, **kwargs):
        """Apply `apply_func` `ntimes` times and concatenate the results along columns.
        See `vectorbt.base.combine_fns.apply_and_concat_one`.

        Arguments `*args` and `**kwargs` will be directly passed to `apply_func`.
        If `to_2d` is True, 2-dimensional NumPy arrays will be passed, otherwise as is.
        Use `keys` as the outermost level.

        !!! note
            The resulted arrays to be concatenated must have the same shape as broadcast input arrays.

        ## Example

        ```python-repl
        >>> import vectorbt as vbt
        >>> import pandas as pd

        >>> df = pd.DataFrame([[3, 4], [5, 6]], index=['x', 'y'], columns=['a', 'b'])
        >>> df.vbt.apply_and_concat(3, [1, 2, 3],
        ...     apply_func=lambda i, a, b: a * b[i], keys=['c', 'd', 'e'])
              c       d       e
           a  b   a   b   a   b
        x  3  4   6   8   9  12
        y  5  6  10  12  15  18
        ```
        """
        checks.assert_not_none(apply_func)
        # Optionally cast to 2d array
        if to_2d:
            obj_arr = reshape_fns.to_2d(self._obj, raw=True)
        else:
            obj_arr = np.asarray(self._obj)
        if checks.is_numba_func(apply_func):
            result = combine_fns.apply_and_concat_one_nb(ntimes, apply_func, obj_arr, *args, **kwargs)
        else:
            result = combine_fns.apply_and_concat_one(ntimes, apply_func, obj_arr, *args, **kwargs)
        # Build column hierarchy
        if keys is not None:
            new_columns = index_fns.combine_indexes(keys, self.wrapper.columns)
        else:
            top_columns = pd.Index(np.arange(ntimes), name='apply_idx')
            new_columns = index_fns.combine_indexes(top_columns, self.wrapper.columns)
        return self.wrapper.wrap(result, columns=new_columns, group_by=False)

예제 #8

0

파일 보기

파일: accessors.py 프로젝트: zhgu-dev/vectorbt

    def tile(self, n, keys=None, axis=1, wrap_kwargs=None):
        """See `vectorbt.base.reshape_fns.tile`.

        Set `axis` to 1 for columns and 0 for index.
        Use `keys` as the outermost level."""
        tiled = reshape_fns.tile(self._obj, n, axis=axis)
        if keys is not None:
            if axis == 1:
                new_columns = index_fns.combine_indexes(
                    keys, self.wrapper.columns)
                return tiled.vbt.wrapper.wrap(
                    tiled.values,
                    **merge_dicts(dict(columns=new_columns), wrap_kwargs))
            else:
                new_index = index_fns.combine_indexes(keys, self.wrapper.index)
                return tiled.vbt.wrapper.wrap(
                    tiled.values,
                    **merge_dicts(dict(index=new_index), wrap_kwargs))
        return tiled

예제 #9

0

파일 보기

파일: accessors.py 프로젝트: varnittewari/vectorbt

    def generate_take_profit_exits(self, ts, stops, first=True, iteratively=False, keys=None, broadcast_kwargs={}):
        """Generate take profit exits.

        See `vectorbt.signals.nb.generate_tp_ex_iter_nb` if `iteratively` is `True`, otherwise see
        `vectorbt.signals.nb.generate_tp_ex_nb`.

        Arguments will be broadcasted using `vectorbt.base.reshape_fns.broadcast`
        with `broadcast_kwargs`. Argument `stops` can be either a single number, an array of
        numbers, or a 3D array, where each matrix corresponds to a single configuration.
        Use `keys` as the outermost level.

        Example:
            For each entry in `sig`, set take profit for 10% and 20% above the entry price:
            ```python-repl
            >>> ts = pd.Series([1, 2, 3, 4, 5])
            >>> sig.vbt.signals.generate_take_profit_exits(ts, [0.1, 0.5])
            take_profit                  0.1                  0.5
                             a      b      c      a      b      c
            2020-01-01   False  False  False  False  False  False
            2020-01-02    True   True  False   True   True  False
            2020-01-03   False  False  False  False  False  False
            2020-01-04   False   True   True  False  False  False
            2020-01-05   False  False  False  False  False   True
            ```"""
        entries = self._obj
        checks.assert_type(ts, (pd.Series, pd.DataFrame))

        broadcast_kwargs = merge_kwargs(dict(require_kwargs=dict(requirements='W')), broadcast_kwargs)
        entries, ts = reshape_fns.broadcast(entries, ts, **broadcast_kwargs)
        stops = reshape_fns.broadcast_to_array_of(stops, entries.vbt.to_2d_array())

        # Build column hierarchy
        if keys is not None:
            param_columns = keys
        else:
            param_columns = index_fns.index_from_values(stops, name='take_profit')
        columns = index_fns.combine_indexes(param_columns, entries.vbt.columns)

        # Perform generation
        if iteratively:
            new_entries, exits = nb.generate_tp_ex_iter_nb(
                entries.vbt.to_2d_array(),
                ts.vbt.to_2d_array(),
                stops)
            return entries.vbt.wrap(new_entries, columns=columns), entries.vbt.wrap(exits, columns=columns)
        else:
            exits = nb.generate_tp_ex_nb(
                entries.vbt.to_2d_array(),
                ts.vbt.to_2d_array(),
                stops,
                first=first)
            return entries.vbt.wrap(exits, columns=columns)

예제 #10

0

파일 보기

    def split_into_ranges(self, n=None, range_len=None):
        """Split into `n` ranges each `range_len` long.

        At least one of `range_len` and `n` must be set.
        If `range_len` is `None`, will split evenly into `n` ranges.
        If `n` is `None`, will return the maximum number of ranges of length `range_len`.

        !!! note
            The datetime-like format of the index will be lost as result of this operation.
            Make sure to store the index metadata such as frequency information beforehand.

        Example:
            ```python-repl
            >>> print(df.vbt.split_into_ranges(n=2))
                                            a                     b                     c
            range_start 2020-01-01 2020-01-04 2020-01-01 2020-01-04 2020-01-01 2020-01-04
            range_end   2020-01-02 2020-01-05 2020-01-02 2020-01-05 2020-01-02 2020-01-05
            0                  1.0        4.0        5.0        2.0        1.0        2.0
            1                  2.0        5.0        4.0        1.0        2.0        1.0
            >>> print(df.vbt.split_into_ranges(range_len=4))
                                            a                     b                     c
            range_start 2020-01-01 2020-01-02 2020-01-01 2020-01-02 2020-01-01 2020-01-02
            range_end   2020-01-04 2020-01-05 2020-01-04 2020-01-05 2020-01-04 2020-01-05
            0                  1.0        2.0        5.0        4.0        1.0        2.0
            1                  2.0        3.0        4.0        3.0        2.0        3.0
            2                  3.0        4.0        3.0        2.0        3.0        2.0
            3                  4.0        5.0        2.0        1.0        2.0        1.0
            ```"""
        if range_len is None and n is None:
            raise ValueError("At least range_len or n must be set")

        if range_len is None:
            range_len = len(self.index) // n
        cube = nb.rolling_window_nb(self.to_2d_array(), range_len)
        if n is not None:
            if n > cube.shape[2]:
                raise ValueError(f"n cannot be bigger than the maximum number of ranges {cube.shape[2]}")
            idxs = np.round(np.linspace(0, cube.shape[2] - 1, n)).astype(int)
            cube = cube[:, :, idxs]
        else:
            idxs = np.arange(cube.shape[2])
        matrix = np.hstack(cube)
        range_starts = pd.Index(self.index[idxs], name='range_start')
        range_ends = pd.Index(self.index[idxs + range_len - 1], name='range_end')
        range_columns = index_fns.stack_indexes(range_starts, range_ends)
        new_columns = index_fns.combine_indexes(self.columns, range_columns)
        return pd.DataFrame(matrix, columns=new_columns)

예제 #11

0

파일 보기

def build_column_hierarchy(param_list, level_names, ts_columns):
    """For each parameter in `param_list`, create a new column level with parameter values. 
    Combine this level with columns `ts_columns` using Cartesian product.
    
    Excludes level names that are `None`."""
    checks.assert_same_shape(param_list, level_names, axis=0)

    param_indexes = []
    for i in range(len(param_list)):
        if level_names[i] is not None:
            param_index = index_fns.index_from_values(param_list[i],
                                                      name=level_names[i])
            param_indexes.append(param_index)
    if len(param_indexes) > 1:
        param_columns = index_fns.stack_indexes(*param_indexes)
    elif len(param_indexes) == 1:
        param_columns = param_indexes[0]
    else:
        param_columns = None
    if param_columns is not None:
        return index_fns.combine_indexes(param_columns, ts_columns)
    return ts_columns

예제 #12

0

파일 보기

    def combine(self,
                other: tp.MaybeTupleList[tp.Union[tp.ArrayLike,
                                                  "BaseAccessor"]],
                *args,
                allow_multiple: bool = True,
                combine_func: tp.Optional[tp.Callable] = None,
                keep_pd: bool = False,
                to_2d: bool = False,
                concat: bool = False,
                numba_loop: bool = False,
                use_ray: bool = False,
                broadcast: bool = True,
                broadcast_kwargs: tp.KwargsLike = None,
                keys: tp.Optional[tp.IndexLike] = None,
                wrap_kwargs: tp.KwargsLike = None,
                **kwargs) -> tp.SeriesFrame:
        """Combine with `other` using `combine_func`.

        Args:
            other (array_like): Object to combine this array with.
            *args: Variable arguments passed to `combine_func`.
            allow_multiple (bool): Whether a tuple/list will be considered as multiple objects in `other`.
            combine_func (callable): Function to combine two arrays.

                Can be Numba-compiled.
            keep_pd (bool): Whether to keep inputs as pandas objects, otherwise convert to NumPy arrays.
            to_2d (bool): Whether to reshape inputs to 2-dim arrays, otherwise keep as-is.
            concat (bool): Whether to concatenate the results along the column axis.
                Otherwise, pairwise combine into a Series/DataFrame of the same shape.

                If True, see `vectorbt.base.combine_fns.combine_and_concat`.
                If False, see `vectorbt.base.combine_fns.combine_multiple`.
            numba_loop (bool): Whether to loop using Numba.

                Set to True when iterating large number of times over small input,
                but note that Numba doesn't support variable keyword arguments.
            use_ray (bool): Whether to use Ray to execute `combine_func` in parallel.

                Only works with `numba_loop` set to False and `concat` is set to True.
                See `vectorbt.base.combine_fns.ray_apply` for related keyword arguments.
            broadcast (bool): Whether to broadcast all inputs.
            broadcast_kwargs (dict): Keyword arguments passed to `vectorbt.base.reshape_fns.broadcast`.
            keys (index_like): Outermost column level.
            wrap_kwargs (dict): Keyword arguments passed to `vectorbt.base.array_wrapper.ArrayWrapper.wrap`.
            **kwargs: Keyword arguments passed to `combine_func`.

        !!! note
            If `combine_func` is Numba-compiled, will broadcast using `WRITEABLE` and `C_CONTIGUOUS`
            flags, which can lead to an expensive computation overhead if passed objects are large and
            have different shape/memory order. You also must ensure that all objects have the same data type.

            Also remember to bring each in `*args` to a Numba-compatible format.

        ## Example

        ```python-repl
        >>> import vectorbt as vbt
        >>> import pandas as pd

        >>> sr = pd.Series([1, 2], index=['x', 'y'])
        >>> df = pd.DataFrame([[3, 4], [5, 6]], index=['x', 'y'], columns=['a', 'b'])

        >>> sr.vbt.combine(df, combine_func=lambda x, y: x + y)
           a  b
        x  4  5
        y  7  8

        >>> sr.vbt.combine([df, df*2], combine_func=lambda x, y: x + y)
            a   b
        x  10  13
        y  17  20

        >>> sr.vbt.combine([df, df*2], combine_func=lambda x, y: x + y, concat=True, keys=['c', 'd'])
              c       d
           a  b   a   b
        x  4  5   7   9
        y  7  8  12  14
        ```

        Use Ray for small inputs and large processing times:

        ```python-repl
        >>> def combine_func(a, b):
        ...     time.sleep(1)
        ...     return a + b

        >>> sr = pd.Series([1, 2, 3])

        >>> %timeit sr.vbt.combine([1, 1, 1], combine_func=combine_func)
        3.01 s ± 2.98 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)

        >>> %timeit sr.vbt.combine([1, 1, 1], combine_func=combine_func, concat=True, use_ray=True)
        1.02 s ± 2.32 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
        ```
        """
        if not allow_multiple or not isinstance(other, (tuple, list)):
            others = (other, )
        else:
            others = other
        others = tuple(
            map(lambda x: x.obj if isinstance(x, BaseAccessor) else x, others))
        checks.assert_not_none(combine_func)
        # Broadcast arguments
        if broadcast:
            if broadcast_kwargs is None:
                broadcast_kwargs = {}
            if checks.is_numba_func(combine_func):
                # Numba requires writeable arrays
                # Plus all of our arrays must be in the same order
                broadcast_kwargs = merge_dicts(
                    dict(require_kwargs=dict(requirements=['W', 'C'])),
                    broadcast_kwargs)
            new_obj, *new_others = reshape_fns.broadcast(
                self.obj, *others, **broadcast_kwargs)
        else:
            new_obj, new_others = self.obj, others
        if not checks.is_pandas(new_obj):
            new_obj = ArrayWrapper.from_shape(new_obj.shape).wrap(new_obj)
        # Optionally cast to 2d array
        if to_2d:
            inputs = tuple(
                map(lambda x: reshape_fns.to_2d(x, raw=not keep_pd),
                    (new_obj, *new_others)))
        else:
            if not keep_pd:
                inputs = tuple(
                    map(lambda x: np.asarray(x), (new_obj, *new_others)))
            else:
                inputs = new_obj, *new_others
        if len(inputs) == 2:
            result = combine_func(inputs[0], inputs[1], *args, **kwargs)
            return ArrayWrapper.from_obj(new_obj).wrap(
                result, **merge_dicts({}, wrap_kwargs))
        if concat:
            # Concat the results horizontally
            if checks.is_numba_func(combine_func) and numba_loop:
                if use_ray:
                    raise ValueError("Ray cannot be used within Numba")
                for i in range(1, len(inputs)):
                    checks.assert_meta_equal(inputs[i - 1], inputs[i])
                result = combine_fns.combine_and_concat_nb(
                    inputs[0], inputs[1:], combine_func, *args, **kwargs)
            else:
                if use_ray:
                    result = combine_fns.combine_and_concat_ray(
                        inputs[0], inputs[1:], combine_func, *args, **kwargs)
                else:
                    result = combine_fns.combine_and_concat(
                        inputs[0], inputs[1:], combine_func, *args, **kwargs)
            columns = ArrayWrapper.from_obj(new_obj).columns
            if keys is not None:
                new_columns = index_fns.combine_indexes([keys, columns])
            else:
                top_columns = pd.Index(np.arange(len(new_others)),
                                       name='combine_idx')
                new_columns = index_fns.combine_indexes([top_columns, columns])
            return ArrayWrapper.from_obj(new_obj).wrap(
                result, **merge_dicts(dict(columns=new_columns), wrap_kwargs))
        else:
            # Combine arguments pairwise into one object
            if use_ray:
                raise ValueError("Ray cannot be used with concat=False")
            if checks.is_numba_func(combine_func) and numba_loop:
                for i in range(1, len(inputs)):
                    checks.assert_dtype_equal(inputs[i - 1], inputs[i])
                result = combine_fns.combine_multiple_nb(
                    inputs, combine_func, *args, **kwargs)
            else:
                result = combine_fns.combine_multiple(inputs, combine_func,
                                                      *args, **kwargs)
            return ArrayWrapper.from_obj(new_obj).wrap(
                result, **merge_dicts({}, wrap_kwargs))

예제 #13

0

파일 보기

    def apply_and_concat(self,
                         ntimes: int,
                         *args,
                         apply_func: tp.Optional[tp.Callable] = None,
                         keep_pd: bool = False,
                         to_2d: bool = False,
                         numba_loop: bool = False,
                         use_ray: bool = False,
                         keys: tp.Optional[tp.IndexLike] = None,
                         wrap_kwargs: tp.KwargsLike = None,
                         **kwargs) -> tp.Frame:
        """Apply `apply_func` `ntimes` times and concatenate the results along columns.
        See `vectorbt.base.combine_fns.apply_and_concat_one`.

        Args:
            ntimes (int): Number of times to call `apply_func`.
            *args: Variable arguments passed to `apply_func`.
            apply_func (callable): Apply function.

                Can be Numba-compiled.
            keep_pd (bool): Whether to keep inputs as pandas objects, otherwise convert to NumPy arrays.
            to_2d (bool): Whether to reshape inputs to 2-dim arrays, otherwise keep as-is.
            numba_loop (bool): Whether to loop using Numba.

                Set to True when iterating large number of times over small input,
                but note that Numba doesn't support variable keyword arguments.
            use_ray (bool): Whether to use Ray to execute `combine_func` in parallel.

                Only works with `numba_loop` set to False and `concat` is set to True.
                See `vectorbt.base.combine_fns.ray_apply` for related keyword arguments.
            keys (index_like): Outermost column level.
            wrap_kwargs (dict): Keyword arguments passed to `vectorbt.base.array_wrapper.ArrayWrapper.wrap`.
            **kwargs: Keyword arguments passed to `combine_func`.

        !!! note
            The resulted arrays to be concatenated must have the same shape as broadcast input arrays.

        ## Example

        ```python-repl
        >>> import vectorbt as vbt
        >>> import pandas as pd

        >>> df = pd.DataFrame([[3, 4], [5, 6]], index=['x', 'y'], columns=['a', 'b'])
        >>> df.vbt.apply_and_concat(3, [1, 2, 3],
        ...     apply_func=lambda i, a, b: a * b[i], keys=['c', 'd', 'e'])
              c       d       e
           a  b   a   b   a   b
        x  3  4   6   8   9  12
        y  5  6  10  12  15  18
        ```

        Use Ray for small inputs and large processing times:

        ```python-repl
        >>> def apply_func(i, a):
        ...     time.sleep(1)
        ...     return a

        >>> sr = pd.Series([1, 2, 3])

        >>> %timeit sr.vbt.apply_and_concat(3, apply_func=apply_func)
        3.01 s ± 2.15 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)

        >>> %timeit sr.vbt.apply_and_concat(3, apply_func=apply_func, use_ray=True)
        1.01 s ± 2.31 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
        ```
        """
        checks.assert_not_none(apply_func)
        # Optionally cast to 2d array
        if to_2d:
            obj = reshape_fns.to_2d(self.obj, raw=not keep_pd)
        else:
            if not keep_pd:
                obj = np.asarray(self.obj)
            else:
                obj = self.obj
        if checks.is_numba_func(apply_func) and numba_loop:
            if use_ray:
                raise ValueError("Ray cannot be used within Numba")
            result = combine_fns.apply_and_concat_one_nb(
                ntimes, apply_func, obj, *args, **kwargs)
        else:
            if use_ray:
                result = combine_fns.apply_and_concat_one_ray(
                    ntimes, apply_func, obj, *args, **kwargs)
            else:
                result = combine_fns.apply_and_concat_one(
                    ntimes, apply_func, obj, *args, **kwargs)
        # Build column hierarchy
        if keys is not None:
            new_columns = index_fns.combine_indexes(
                [keys, self.wrapper.columns])
        else:
            top_columns = pd.Index(np.arange(ntimes), name='apply_idx')
            new_columns = index_fns.combine_indexes(
                [top_columns, self.wrapper.columns])
        return self.wrapper.wrap(result,
                                 group_by=False,
                                 **merge_dicts(dict(columns=new_columns),
                                               wrap_kwargs))

예제 #14

0

파일 보기

파일: accessors.py 프로젝트: dingo9/vectorbt

    def split_into_ranges(self,
                          n=None,
                          range_len=None,
                          start_idxs=None,
                          end_idxs=None):
        """Either split into `n` ranges each `range_len` long, or split into ranges between
        `start_idxs` and `end_idxs`.

        At least one of `range_len`, `n`, or `start_idxs` and `end_idxs` must be set.
        If `range_len` is `None`, will split evenly into `n` ranges.
        If `n` is `None`, will return the maximum number of ranges of length `range_len`.
        If `start_idxs` and `end_idxs`, will split into ranges between both arrays.
        Both index arrays must be either NumPy arrays with positions (last exclusive)
        or pandas indexes with labels (last inclusive).

        Created levels `range_start` and `range_end` will contain labels (last inclusive).

        !!! note
            Ranges must have the same length.

            The datetime-like format of the index will be lost as result of this operation.
            Make sure to store the index metadata such as frequency information beforehand.

        Example:
            ```python-repl
            >>> print(df.vbt.split_into_ranges(n=2))
                                            a                     b                     c
            range_start 2020-01-01 2020-01-04 2020-01-01 2020-01-04 2020-01-01 2020-01-04
            range_end   2020-01-02 2020-01-05 2020-01-02 2020-01-05 2020-01-02 2020-01-05
            0                  1.0        4.0        5.0        2.0        1.0        2.0
            1                  2.0        5.0        4.0        1.0        2.0        1.0
            >>> print(df.vbt.split_into_ranges(range_len=4))
                                            a                     b                     c
            range_start 2020-01-01 2020-01-02 2020-01-01 2020-01-02 2020-01-01 2020-01-02
            range_end   2020-01-04 2020-01-05 2020-01-04 2020-01-05 2020-01-04 2020-01-05
            0                  1.0        2.0        5.0        4.0        1.0        2.0
            1                  2.0        3.0        4.0        3.0        2.0        3.0
            2                  3.0        4.0        3.0        2.0        3.0        2.0
            3                  4.0        5.0        2.0        1.0        2.0        1.0
            >>> print(df.vbt.split_into_ranges(start_idxs=[0, 1], end_idxs=[4, 5]))
                                            a                     b                     c
            range_start 2020-01-01 2020-01-02 2020-01-01 2020-01-02 2020-01-01 2020-01-02
            range_end   2020-01-04 2020-01-05 2020-01-04 2020-01-05 2020-01-04 2020-01-05
            0                    1          2          5          4          1          2
            1                    2          3          4          3          2          3
            2                    3          4          3          2          3          2
            3                    4          5          2          1          2          1
            >>> print(df.vbt.split_into_ranges(
            ...     start_idxs=pd.Index(['2020-01-01', '2020-01-03']),
            ...     end_idxs=pd.Index(['2020-01-02', '2020-01-04'])
            ... ))
                                            a                     b                     c
            range_start 2020-01-01 2020-01-03 2020-01-01 2020-01-03 2020-01-01 2020-01-03
            range_end   2020-01-02 2020-01-04 2020-01-02 2020-01-04 2020-01-02 2020-01-04
            0                    1          3          5          3          1          3
            1                    2          4          4          2          2          2
            ```"""
        if start_idxs is None and end_idxs is None:
            if range_len is None and n is None:
                raise ValueError(
                    "At least range_len, n, or start_idxs and end_idxs must be set"
                )
            if range_len is None:
                range_len = len(self.index) // n
            start_idxs = np.arange(len(self.index) - range_len + 1)
            end_idxs = np.arange(range_len, len(self.index) + 1)
        elif start_idxs is None or end_idxs is None:
            raise ValueError("Both start_idxs and end_idxs must be set")
        else:
            if isinstance(start_idxs, pd.Index):
                start_idxs = np.where(self.index.isin(start_idxs))[0]
            else:
                start_idxs = np.asarray(start_idxs)
            if isinstance(end_idxs, pd.Index):
                end_idxs = np.where(self.index.isin(end_idxs))[0] + 1
            else:
                end_idxs = np.asarray(end_idxs)

        if np.any((end_idxs - start_idxs) != (end_idxs - start_idxs).item(0)):
            raise ValueError("Ranges must have the same length")

        if n is not None:
            if n > len(start_idxs):
                raise ValueError(
                    f"n cannot be bigger than the maximum number of ranges {len(start_idxs)}"
                )
            idxs = np.round(np.linspace(0, len(start_idxs) - 1, n)).astype(int)
            start_idxs = start_idxs[idxs]
            end_idxs = end_idxs[idxs]
        matrix = nb.concat_ranges_nb(self.to_2d_array(), start_idxs, end_idxs)
        range_starts = pd.Index(self.index[start_idxs], name='range_start')
        range_ends = pd.Index(self.index[end_idxs - 1], name='range_end')
        range_columns = index_fns.stack_indexes(range_starts, range_ends)
        new_columns = index_fns.combine_indexes(self.columns, range_columns)
        return pd.DataFrame(matrix, columns=new_columns)

예제 #15

0

파일 보기

    def combine_with_multiple(self, others, *args, combine_func=None, to_2d=False,
                              concat=False, broadcast_kwargs={}, keys=None, **kwargs):
        """Combine with `others` using `combine_func`.

        All arguments will be broadcast using `vectorbt.base.reshape_fns.broadcast`
        with `broadcast_kwargs`.

        If `concat` is True, concatenate the results along columns,
        see `vectorbt.base.combine_fns.combine_and_concat`.
        Otherwise, pairwise combine into a Series/DataFrame of the same shape, 
        see `vectorbt.base.combine_fns.combine_multiple`.

        Arguments `*args` and `**kwargs` will be directly passed to `combine_func`. 
        If `to_2d` is True, 2-dimensional NumPy arrays will be passed, otherwise as is.
        Use `keys` as the outermost level.

        !!! note
            If `combine_func` is Numba-compiled, will broadcast using `WRITEABLE` and `C_CONTIGUOUS`
            flags, which can lead to an expensive computation overhead if passed objects are large and
            have different shape/memory order. You also must ensure that all objects have the same data type.

            Also remember to bring each in `*args` to a Numba-compatible format.

        ## Example

        ```python-repl
        >>> import vectorbt as vbt
        >>> import pandas as pd

        >>> sr = pd.Series([1, 2], index=['x', 'y'])
        >>> df = pd.DataFrame([[3, 4], [5, 6]], index=['x', 'y'], columns=['a', 'b'])

        >>> sr.vbt.combine_with_multiple([df, df*2],
        ...     combine_func=lambda x, y: x + y)
            a   b
        x  10  13
        y  17  20

        >>> sr.vbt.combine_with_multiple([df, df*2],
        ...     combine_func=lambda x, y: x + y, concat=True, keys=['c', 'd'])
              c       d
           a  b   a   b
        x  4  5   7   9
        y  7  8  12  14
        ```
        """
        others = tuple(map(lambda x: x._obj if isinstance(x, Base_Accessor) else x, others))
        checks.assert_not_none(combine_func)
        checks.assert_type(others, Iterable)
        # Broadcast arguments
        if checks.is_numba_func(combine_func):
            # Numba requires writeable arrays
            # Plus all of our arrays must be in the same order
            broadcast_kwargs = merge_dicts(dict(require_kwargs=dict(requirements=['W', 'C'])), broadcast_kwargs)
        new_obj, *new_others = reshape_fns.broadcast(self._obj, *others, **broadcast_kwargs)
        # Optionally cast to 2d array
        if to_2d:
            bc_arrays = tuple(map(lambda x: reshape_fns.to_2d(x, raw=True), (new_obj, *new_others)))
        else:
            bc_arrays = tuple(map(lambda x: np.asarray(x), (new_obj, *new_others)))
        if concat:
            # Concat the results horizontally
            if checks.is_numba_func(combine_func):
                for i in range(1, len(bc_arrays)):
                    checks.assert_meta_equal(bc_arrays[i - 1], bc_arrays[i])
                result = combine_fns.combine_and_concat_nb(bc_arrays[0], bc_arrays[1:], combine_func, *args, **kwargs)
            else:
                result = combine_fns.combine_and_concat(bc_arrays[0], bc_arrays[1:], combine_func, *args, **kwargs)
            columns = new_obj.vbt.wrapper.columns
            if keys is not None:
                new_columns = index_fns.combine_indexes(keys, columns)
            else:
                top_columns = pd.Index(np.arange(len(new_others)), name='combine_idx')
                new_columns = index_fns.combine_indexes(top_columns, columns)
            return new_obj.vbt.wrapper.wrap(result, columns=new_columns)
        else:
            # Combine arguments pairwise into one object
            if checks.is_numba_func(combine_func):
                for i in range(1, len(bc_arrays)):
                    checks.assert_dtype_equal(bc_arrays[i - 1], bc_arrays[i])
                result = combine_fns.combine_multiple_nb(bc_arrays, combine_func, *args, **kwargs)
            else:
                result = combine_fns.combine_multiple(bc_arrays, combine_func, *args, **kwargs)
            return new_obj.vbt.wrapper.wrap(result)