Exemplo n.º 1
0
def update_dict(x: InConfigLikeT,
                y: InConfigLikeT,
                nested: bool = True,
                force: bool = False,
                same_keys: bool = False) -> None:
    """Update dict with keys and values from other dict.

    Set `nested` to True to update all child dicts in recursive manner.
    For `force`, see `set_dict_item`.

    If you want to treat any dict as a single value, wrap it with `atomic_dict`.

    !!! note
        If the child dict is not atomic, it will copy only its values, not its meta."""
    if x is None:
        return
    if y is None:
        return
    checks.assert_instance_of(x, dict)
    checks.assert_instance_of(y, dict)

    for k, v in y.items():
        if nested \
                and k in x \
                and isinstance(x[k], dict) \
                and isinstance(v, dict) \
                and not isinstance(v, atomic_dict):
            update_dict(x[k], v, force=force)
        else:
            if same_keys and k not in x:
                continue
            set_dict_item(x, k, v, force=force)
Exemplo n.º 2
0
    def __init__(self, wrapper: ArrayWrapper, **kwargs) -> None:
        checks.assert_instance_of(wrapper, ArrayWrapper)
        self._wrapper = wrapper

        Configured.__init__(self, wrapper=wrapper, **kwargs)
        PandasIndexer.__init__(self)
        AttrResolver.__init__(self)
Exemplo n.º 3
0
def copy_dict(dct: InConfigLikeT,
              copy_mode: str = 'shallow',
              nested: bool = True) -> OutConfigLikeT:
    """Copy dict based on a copy mode.

    The following modes are supported:

    * 'shallow': Copies keys only.
    * 'hybrid': Copies keys and values using `copy.copy`.
    * 'deep': Copies the whole thing using `copy.deepcopy`.

    Set `nested` to True to copy all child dicts in recursive manner."""
    if dct is None:
        dct = {}
    checks.assert_instance_of(copy_mode, str)
    copy_mode = copy_mode.lower()
    if copy_mode not in ['shallow', 'hybrid', 'deep']:
        raise ValueError(f"Copy mode '{copy_mode}' not supported")

    if copy_mode == 'deep':
        return deepcopy(dct)
    if isinstance(dct, Config):
        return dct.copy(copy_mode=copy_mode, nested=nested)
    dct_copy = copy(dct)  # copy structure using shallow copy
    for k, v in dct_copy.items():
        if nested and isinstance(v, dict):
            _v = copy_dict(v, copy_mode=copy_mode, nested=nested)
        else:
            if copy_mode == 'hybrid':
                _v = copy(v)  # copy values using shallow copy
            else:
                _v = v
        set_dict_item(dct_copy, k, _v, force=True)
    return dct_copy
Exemplo n.º 4
0
def create_param_combs(op_tree: tp.Tuple, depth: int = 0) -> tp.List[tp.List]:
    """Create arbitrary parameter combinations from the operation tree `op_tree`.

    `op_tree` is a tuple with nested instructions to generate parameters.
    The first element of the tuple should be a callable that takes remaining elements as arguments.
    If one of the elements is a tuple itself and its first argument is a callable, it will be
    unfolded in the same way as above.

    Usage:
        ```pycon
        >>> import numpy as np
        >>> from itertools import combinations, product

        >>> create_param_combs((product, (combinations, [0, 1, 2, 3], 2), [4, 5]))
        [[0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2],
         [1, 1, 2, 2, 3, 3, 2, 2, 3, 3, 3, 3],
         [4, 5, 4, 5, 4, 5, 4, 5, 4, 5, 4, 5]]
        ```
    """
    checks.assert_instance_of(op_tree, tuple)
    checks.assert_instance_of(op_tree[0], Callable)
    new_op_tree: tp.Tuple = (op_tree[0], )
    for elem in op_tree[1:]:
        if isinstance(elem, tuple) and isinstance(elem[0], Callable):
            new_op_tree += (create_param_combs(elem, depth=depth + 1), )
        else:
            new_op_tree += (elem, )
    out = list(new_op_tree[0](*new_op_tree[1:]))
    if depth == 0:
        # do something
        return flatten_param_tuples(out)
    return out
Exemplo n.º 5
0
    def __init__(self,
                 wrapper: ArrayWrapper,
                 data: tp.Data,
                 tz_localize: tp.Optional[tp.TimezoneLike],
                 tz_convert: tp.Optional[tp.TimezoneLike],
                 missing_index: str,
                 missing_columns: str,
                 download_kwargs: dict,
                 **kwargs) -> None:
        Wrapping.__init__(
            self,
            wrapper,
            data=data,
            tz_localize=tz_localize,
            tz_convert=tz_convert,
            missing_index=missing_index,
            missing_columns=missing_columns,
            download_kwargs=download_kwargs,
            **kwargs
        )
        StatsBuilderMixin.__init__(self)
        PlotsBuilderMixin.__init__(self)

        checks.assert_instance_of(data, dict)
        for k, v in data.items():
            checks.assert_meta_equal(v, data[list(data.keys())[0]])
        self._data = data
        self._tz_localize = tz_localize
        self._tz_convert = tz_convert
        self._missing_index = missing_index
        self._missing_columns = missing_columns
        self._download_kwargs = download_kwargs
Exemplo n.º 6
0
    def __init__(self, scheduler: tp.Optional[AsyncScheduler] = None) -> None:
        if scheduler is None:
            scheduler = AsyncScheduler()
        checks.assert_instance_of(scheduler, AsyncScheduler)

        self._scheduler = scheduler
        self._async_task = None
Exemplo n.º 7
0
def select_levels(index: tp.Index,
                  level_names: tp.MaybeLevelSequence) -> tp.Index:
    """Build a new index by selecting one or multiple `level_names` from `index`."""
    checks.assert_instance_of(index, pd.MultiIndex)

    if isinstance(level_names, (int, str)):
        return index.get_level_values(level_names)
    levels = [index.get_level_values(level_name) for level_name in level_names]
    return pd.MultiIndex.from_arrays(levels)
Exemplo n.º 8
0
    def __init__(self, mapper: tp.Series, indexing_func: tp.Callable, level_name: tp.Level = None, **kwargs) -> None:
        checks.assert_instance_of(mapper, pd.Series)

        if mapper.dtype == 'O':
            # If params are objects, we must cast them to string first
            # The original mapper isn't touched
            mapper = mapper.astype(str)
        self._mapper = mapper
        self._level_name = level_name

        LocBase.__init__(self, indexing_func, **kwargs)
Exemplo n.º 9
0
    def __init__(self,
                 returns_accessor: ReturnsAccessor,
                 defaults: tp.KwargsLike = None,
                 **kwargs) -> None:
        checks.assert_instance_of(returns_accessor, ReturnsAccessor)

        Configured.__init__(self,
                            returns_accessor=returns_accessor,
                            defaults=defaults,
                            **kwargs)

        self._returns_accessor = returns_accessor
        self._defaults = defaults
Exemplo n.º 10
0
def get_index(arg: tp.SeriesFrame, axis: int) -> tp.Index:
    """Get index of `arg` by `axis`."""
    checks.assert_instance_of(arg, (pd.Series, pd.DataFrame))
    checks.assert_in(axis, (0, 1))

    if axis == 0:
        return arg.index
    else:
        if checks.is_series(arg):
            if arg.name is not None:
                return pd.Index([arg.name])
            return pd.Index([0])  # same as how pandas does it
        else:
            return arg.columns
Exemplo n.º 11
0
def indexing_on_mapper(mapper: tp.Series, ref_obj: tp.SeriesFrame,
                       pd_indexing_func: tp.Callable) -> tp.Optional[tp.Series]:
    """Broadcast `mapper` Series to `ref_obj` and perform pandas indexing using `pd_indexing_func`."""
    checks.assert_instance_of(mapper, pd.Series)
    checks.assert_instance_of(ref_obj, (pd.Series, pd.DataFrame))

    df_range_mapper = reshape_fns.broadcast_to(np.arange(len(mapper.index)), ref_obj)
    loced_range_mapper = pd_indexing_func(df_range_mapper)
    new_mapper = mapper.iloc[loced_range_mapper.values[0]]
    if checks.is_frame(loced_range_mapper):
        return pd.Series(new_mapper.values, index=loced_range_mapper.columns, name=mapper.name)
    elif checks.is_series(loced_range_mapper):
        return pd.Series([new_mapper], index=[loced_range_mapper.name], name=mapper.name)
    return None
Exemplo n.º 12
0
def get_multiindex_series(arg: tp.SeriesFrame) -> tp.Series:
    """Get Series with a multi-index.

    If DataFrame has been passed, should at maximum have one row or column."""
    checks.assert_instance_of(arg, (pd.Series, pd.DataFrame))
    if checks.is_frame(arg):
        if arg.shape[0] == 1:
            arg = arg.iloc[0, :]
        elif arg.shape[1] == 1:
            arg = arg.iloc[:, 0]
        else:
            raise ValueError(
                "Supported are either Series or DataFrame with one column/row")
    checks.assert_instance_of(arg.index, pd.MultiIndex)
    return arg
Exemplo n.º 13
0
    def __init__(self, obj: tp.SeriesFrame, wrapper: tp.Optional[ArrayWrapper] = None, **kwargs) -> None:
        checks.assert_instance_of(obj, (pd.Series, pd.DataFrame))

        self._obj = obj

        wrapper_arg_names = get_func_arg_names(ArrayWrapper.__init__)
        grouper_arg_names = get_func_arg_names(ColumnGrouper.__init__)
        wrapping_kwargs = dict()
        for k in list(kwargs.keys()):
            if k in wrapper_arg_names or k in grouper_arg_names:
                wrapping_kwargs[k] = kwargs.pop(k)
        if wrapper is None:
            wrapper = ArrayWrapper.from_obj(obj, **wrapping_kwargs)
        else:
            wrapper = wrapper.replace(**wrapping_kwargs)
        Wrapping.__init__(self, wrapper, obj=obj, **kwargs)
Exemplo n.º 14
0
    def align_to(self,
                 other: tp.SeriesFrame,
                 wrap_kwargs: tp.KwargsLike = None) -> tp.SeriesFrame:
        """Align to `other` on their axes.

        ## Example

        ```python-repl
        >>> import vectorbt as vbt
        >>> import pandas as pd

        >>> df1 = pd.DataFrame([[1, 2], [3, 4]], index=['x', 'y'], columns=['a', 'b'])
        >>> df1
           a  b
        x  1  2
        y  3  4

        >>> df2 = pd.DataFrame([[5, 6, 7, 8], [9, 10, 11, 12]], index=['x', 'y'],
        ...     columns=pd.MultiIndex.from_arrays([[1, 1, 2, 2], ['a', 'b', 'a', 'b']]))
        >>> df2
               1       2
           a   b   a   b
        x  5   6   7   8
        y  9  10  11  12

        >>> df1.vbt.align_to(df2)
              1     2
           a  b  a  b
        x  1  2  1  2
        y  3  4  3  4
        ```
        """
        checks.assert_instance_of(other, (pd.Series, pd.DataFrame))
        obj = reshape_fns.to_2d(self.obj)
        other = reshape_fns.to_2d(other)

        aligned_index = index_fns.align_index_to(obj.index, other.index)
        aligned_columns = index_fns.align_index_to(obj.columns, other.columns)
        obj = obj.iloc[aligned_index, aligned_columns]
        return self.wrapper.wrap(obj.values,
                                 group_by=False,
                                 **merge_dicts(
                                     dict(index=other.index,
                                          columns=other.columns), wrap_kwargs))
Exemplo n.º 15
0
def pick_levels(
    index: tp.Index,
    required_levels: OptionalLevelSequence = None,
    optional_levels: OptionalLevelSequence = None
) -> tp.Tuple[tp.List[int], tp.List[int]]:
    """Pick optional and required levels and return their indices.

    Raises an exception if index has less or more levels than expected."""
    if required_levels is None:
        required_levels = []
    if optional_levels is None:
        optional_levels = []
    checks.assert_instance_of(index, pd.MultiIndex)

    n_opt_set = len(list(filter(lambda x: x is not None, optional_levels)))
    n_req_set = len(list(filter(lambda x: x is not None, required_levels)))
    n_levels_left = index.nlevels - n_opt_set
    if n_req_set < len(required_levels):
        if n_levels_left != len(required_levels):
            n_expected = len(required_levels) + n_opt_set
            raise ValueError(
                f"Expected {n_expected} levels, found {index.nlevels}")

    levels_left = list(range(index.nlevels))

    # Pick optional levels
    _optional_levels = []
    for level in optional_levels:
        level_pos = None
        if level is not None:
            checks.assert_instance_of(level, (int, str))
            if isinstance(level, str):
                level_pos = index.names.index(level)
            else:
                level_pos = level
            if level_pos < 0:
                level_pos = index.nlevels + level_pos
            levels_left.remove(level_pos)
        _optional_levels.append(level_pos)

    # Pick required levels
    _required_levels = []
    for level in required_levels:
        level_pos = None
        if level is not None:
            checks.assert_instance_of(level, (int, str))
            if isinstance(level, str):
                level_pos = index.names.index(level)
            else:
                level_pos = level
            if level_pos < 0:
                level_pos = index.nlevels + level_pos
            levels_left.remove(level_pos)
        _required_levels.append(level_pos)
    for i, level in enumerate(_required_levels):
        if level is None:
            _required_levels[i] = levels_left.pop(0)

    return _required_levels, _optional_levels
Exemplo n.º 16
0
    def __init__(self,
                 columns: tp.Index,
                 group_by: tp.GroupByLike = None,
                 allow_enable: bool = True,
                 allow_disable: bool = True,
                 allow_modify: bool = True) -> None:
        Configured.__init__(self,
                            columns=columns,
                            group_by=group_by,
                            allow_enable=allow_enable,
                            allow_disable=allow_disable,
                            allow_modify=allow_modify)

        checks.assert_instance_of(columns, pd.Index)
        self._columns = columns
        if group_by is None or group_by is False:
            self._group_by = None
        else:
            self._group_by = group_by_to_index(columns, group_by)

        # Everything is allowed by default
        self._allow_enable = allow_enable
        self._allow_disable = allow_disable
        self._allow_modify = allow_modify
Exemplo n.º 17
0
    def _get_condition_rank(cond: CacheCondition) -> int:
        # Perform initial checks
        checks.assert_instance_of(cond, CacheCondition)

        if cond.instance is not None:
            if instance is not cond.instance:
                return start_rank
        if cond.func is not None:
            if isinstance(cond.func, cached_property):  # cached_property
                if func != cond.func.func:
                    return start_rank
            elif callable(cond.func) and hasattr(func, 'func') and hasattr(
                    cond.func, 'func'):  # cached_method
                if func.func != cond.func.func:
                    return start_rank
            elif isinstance(cond.func, str):
                if func_name != cond.func:
                    return start_rank
            else:
                raise TypeError(
                    f"Caching condition {cond}: func must be either a callable or a string"
                )
        if cond.cls is not None:
            if inspect.isclass(cond.cls):
                if type(instance) != cond.cls:
                    return start_rank
            elif isinstance(cond.cls, str):
                if type(instance).__name__ != cond.cls:
                    return start_rank
            else:
                raise TypeError(
                    f"Caching condition {cond}: cls must be either a class or a string"
                )
        if cond.base_cls is not None:
            if inspect.isclass(cond.base_cls) or isinstance(
                    cond.base_cls, str):
                if not checks.is_instance_of(instance, cond.base_cls):
                    return start_rank
            else:
                raise TypeError(
                    f"Caching condition {cond}: base_cls must be either a class or a string"
                )
        if cond.flags is not None:
            if not isinstance(cond.flags, dict):
                raise TypeError(
                    f"Caching condition {cond}: flags must be a dict")
            for k, v in cond.flags.items():
                if k not in flags or flags[k] != v:
                    return start_rank
        if cond.rank is not None:
            if not isinstance(cond.rank, int):
                raise TypeError(
                    f"Caching condition {cond}: rank must be an integer")
            ranks = [cond.rank for _ in range(12)]
        else:
            ranks = list(range(12))

        # Rank instance conditions
        if cond.instance is not None and cond.func is not None:
            return ranks[0]
        if cond.instance is not None and cond.flags is not None:
            return ranks[1]
        if cond.instance is not None:
            return ranks[2]

        # Rank class conditions
        if cond.cls is not None and cond.func is not None:
            return ranks[3]
        if cond.cls is not None and cond.flags is not None:
            return ranks[4]
        if cond.cls is not None:
            return ranks[5]

        # Rank base class conditions
        if cond.base_cls is not None and cond.func is not None:
            return ranks[6]
        if cond.base_cls is not None and cond.flags is not None:
            return ranks[7]
        if cond.base_cls is not None:
            return ranks[8]

        # Rank function conditions
        if cond.func is not None and cond.flags is not None:
            return ranks[9]
        if cond.func is not None:
            return ranks[10]
        if cond.flags is not None:
            return ranks[11]

        return start_rank
Exemplo n.º 18
0
    def __init__(self, obj: tp.Frame, **kwargs) -> None:
        checks.assert_instance_of(obj, pd.DataFrame)

        BaseAccessor.__init__(self, obj, **kwargs)
Exemplo n.º 19
0
    def __init__(self, obj: tp.Series, **kwargs) -> None:
        checks.assert_instance_of(obj, pd.Series)

        BaseAccessor.__init__(self, obj, **kwargs)
Exemplo n.º 20
0
    def __init__(self):
        checks.assert_instance_of(self, Wrapping)

        # Copy writeable attrs
        self._subplots = self.__class__._subplots.copy()
Exemplo n.º 21
0
def deep_getattr(obj: tp.Any,
                 attr_chain: tp.Union[str, tuple, Iterable],
                 getattr_func: tp.Callable = default_getattr_func,
                 call_last_attr: bool = True) -> tp.Any:
    """Retrieve attribute consecutively.

    The attribute chain `attr_chain` can be:

    * string -> get variable/property or method without arguments
    * tuple of string -> call method without arguments
    * tuple of string and tuple -> call method and pass positional arguments (unpacked)
    * tuple of string, tuple, and dict -> call method and pass positional and keyword arguments (unpacked)
    * iterable of any of the above

    Use `getattr_func` to overwrite the default behavior of accessing an attribute (see `default_getattr_func`).

    !!! hint
        If your chain includes only attributes and functions without arguments,
        you can represent this chain as a single (but probably long) string.
    """
    checks.assert_instance_of(attr_chain, (str, tuple, Iterable))

    if isinstance(attr_chain, str):
        if '.' in attr_chain:
            return deep_getattr(
                obj,
                attr_chain.split('.'),
                getattr_func=getattr_func,
                call_last_attr=call_last_attr
            )
        return getattr_func(obj, attr_chain, call_attr=call_last_attr)
    if isinstance(attr_chain, tuple):
        if len(attr_chain) == 1 \
                and isinstance(attr_chain[0], str):
            return getattr_func(obj, attr_chain[0])
        if len(attr_chain) == 2 \
                and isinstance(attr_chain[0], str) \
                and isinstance(attr_chain[1], tuple):
            return getattr_func(obj, attr_chain[0], args=attr_chain[1])
        if len(attr_chain) == 3 \
                and isinstance(attr_chain[0], str) \
                and isinstance(attr_chain[1], tuple) \
                and isinstance(attr_chain[2], dict):
            return getattr_func(obj, attr_chain[0], args=attr_chain[1], kwargs=attr_chain[2])
    result = obj
    for i, attr in enumerate(attr_chain):
        if i < len(attr_chain) - 1:
            result = deep_getattr(
                result,
                attr,
                getattr_func=getattr_func,
                call_last_attr=True
            )
        else:
            result = deep_getattr(
                result,
                attr,
                getattr_func=getattr_func,
                call_last_attr=call_last_attr
            )
    return result
Exemplo n.º 22
0
    def __init__(self) -> None:
        checks.assert_instance_of(self, Wrapping)

        # Copy writeable attrs
        self._metrics = self.__class__._metrics.copy()
Exemplo n.º 23
0
    def __init__(self,
                 dct: tp.DictLike = None,
                 copy_kwargs: tp.KwargsLike = None,
                 reset_dct: tp.DictLike = None,
                 reset_dct_copy_kwargs: tp.KwargsLike = None,
                 frozen_keys: tp.Optional[bool] = None,
                 readonly: tp.Optional[bool] = None,
                 nested: tp.Optional[bool] = None,
                 convert_dicts: tp.Optional[tp.Union[
                     bool, tp.Type["Config"]]] = None,
                 as_attrs: tp.Optional[bool] = None) -> None:
        try:
            from vectorbt._settings import settings
            configured_cfg = settings['config']
        except ImportError:
            configured_cfg = {}

        if dct is None:
            dct = dict()

        # Resolve params
        def _resolve_param(pname: str,
                           p: tp.Any,
                           default: tp.Any,
                           merge: bool = False) -> tp.Any:
            cfg_default = configured_cfg.get(pname, None)
            dct_p = getattr(dct, pname +
                            '_') if isinstance(dct, Config) else None

            if merge and isinstance(default, dict):
                return merge_dicts(default, cfg_default, dct_p, p)
            if p is not None:
                return p
            if dct_p is not None:
                return dct_p
            if cfg_default is not None:
                return cfg_default
            return default

        reset_dct = _resolve_param('reset_dct', reset_dct, None)
        frozen_keys = _resolve_param('frozen_keys', frozen_keys, False)
        readonly = _resolve_param('readonly', readonly, False)
        nested = _resolve_param('nested', nested, False)
        convert_dicts = _resolve_param('convert_dicts', convert_dicts, False)
        as_attrs = _resolve_param('as_attrs', as_attrs, frozen_keys
                                  or readonly)
        reset_dct_copy_kwargs = merge_dicts(copy_kwargs, reset_dct_copy_kwargs)
        copy_kwargs = _resolve_param(
            'copy_kwargs',
            copy_kwargs,
            dict(copy_mode='shallow' if readonly else 'hybrid', nested=nested),
            merge=True)
        reset_dct_copy_kwargs = _resolve_param(
            'reset_dct_copy_kwargs',
            reset_dct_copy_kwargs,
            dict(copy_mode='shallow' if readonly else 'hybrid', nested=nested),
            merge=True)

        # Copy dict
        dct = copy_dict(dict(dct), **copy_kwargs)

        # Convert child dicts
        if convert_dicts:
            if not nested:
                raise ValueError("convert_dicts requires nested to be True")
            for k, v in dct.items():
                if isinstance(v, dict) and not isinstance(v, Config):
                    if isinstance(convert_dicts, bool):
                        config_cls = self.__class__
                    elif issubclass(convert_dicts, Config):
                        config_cls = convert_dicts
                    else:
                        raise TypeError(
                            "convert_dicts must be either boolean or a subclass of Config"
                        )
                    dct[k] = config_cls(
                        v,
                        copy_kwargs=copy_kwargs,
                        reset_dct_copy_kwargs=reset_dct_copy_kwargs,
                        frozen_keys=frozen_keys,
                        readonly=readonly,
                        nested=nested,
                        convert_dicts=convert_dicts,
                        as_attrs=as_attrs)

        # Copy initial config
        if reset_dct is None:
            reset_dct = dct
        reset_dct = copy_dict(dict(reset_dct), **reset_dct_copy_kwargs)

        dict.__init__(self, dct)

        # Store params in an instance variable
        checks.assert_instance_of(copy_kwargs, dict)
        checks.assert_instance_of(reset_dct, dict)
        checks.assert_instance_of(reset_dct_copy_kwargs, dict)
        checks.assert_instance_of(frozen_keys, bool)
        checks.assert_instance_of(readonly, bool)
        checks.assert_instance_of(nested, bool)
        checks.assert_instance_of(convert_dicts, (bool, type))
        checks.assert_instance_of(as_attrs, bool)

        self.__dict__['_copy_kwargs_'] = copy_kwargs
        self.__dict__['_reset_dct_'] = reset_dct
        self.__dict__['_reset_dct_copy_kwargs_'] = reset_dct_copy_kwargs
        self.__dict__['_frozen_keys_'] = frozen_keys
        self.__dict__['_readonly_'] = readonly
        self.__dict__['_nested_'] = nested
        self.__dict__['_convert_dicts_'] = convert_dicts
        self.__dict__['_as_attrs_'] = as_attrs

        # Set keys as attributes for autocomplete
        if as_attrs:
            for k, v in self.items():
                if k in self.__dir__():
                    raise ValueError(
                        f"Cannot set key '{k}' as attribute of the config. Disable as_attrs."
                    )
                self.__dict__[k] = v
Exemplo n.º 24
0
def make_symmetric(arg: tp.SeriesFrame, sort: bool = True) -> tp.Frame:
    """Make `arg` symmetric.

    The index and columns of the resulting DataFrame will be identical.

    Requires the index and columns to have the same number of levels.

    Pass `sort=False` if index and columns should not be sorted, but concatenated
    and get duplicates removed.

    Usage:
        ```pycon
        >>> import pandas as pd
        >>> from vectorbt.base.reshape_fns import make_symmetric

        >>> df = pd.DataFrame([[1, 2], [3, 4]], index=['a', 'b'], columns=['c', 'd'])

        >>> make_symmetric(df)
             a    b    c    d
        a  NaN  NaN  1.0  2.0
        b  NaN  NaN  3.0  4.0
        c  1.0  3.0  NaN  NaN
        d  2.0  4.0  NaN  NaN
        ```
    """
    checks.assert_instance_of(arg, (pd.Series, pd.DataFrame))
    df: tp.Frame = to_2d(arg)
    if isinstance(df.index, pd.MultiIndex) or isinstance(
            df.columns, pd.MultiIndex):
        checks.assert_instance_of(df.index, pd.MultiIndex)
        checks.assert_instance_of(df.columns, pd.MultiIndex)
        checks.assert_array_equal(df.index.nlevels, df.columns.nlevels)
        names1, names2 = tuple(df.index.names), tuple(df.columns.names)
    else:
        names1, names2 = df.index.name, df.columns.name

    if names1 == names2:
        new_name = names1
    else:
        if isinstance(df.index, pd.MultiIndex):
            new_name = tuple(zip(*[names1, names2]))
        else:
            new_name = (names1, names2)
    if sort:
        idx_vals = np.unique(np.concatenate((df.index, df.columns))).tolist()
    else:
        idx_vals = list(dict.fromkeys(np.concatenate((df.index, df.columns))))
    df_index = df.index.copy()
    df_columns = df.columns.copy()
    if isinstance(df.index, pd.MultiIndex):
        unique_index = pd.MultiIndex.from_tuples(idx_vals, names=new_name)
        df_index.names = new_name
        df_columns.names = new_name
    else:
        unique_index = pd.Index(idx_vals, name=new_name)
        df_index.name = new_name
        df_columns.name = new_name
    df = df.copy(deep=False)
    df.index = df_index
    df.columns = df_columns
    df_out_dtype = np.promote_types(df.values.dtype,
                                    np.min_scalar_type(np.nan))
    df_out = pd.DataFrame(index=unique_index,
                          columns=unique_index,
                          dtype=df_out_dtype)
    df_out.loc[:, :] = df
    df_out[df_out.isnull()] = df.transpose()
    return df_out
Exemplo n.º 25
0
def broadcast(*args: tp.ArrayLike,
              to_shape: tp.Optional[tp.RelaxedShape] = None,
              to_pd: tp.Optional[tp.MaybeSequence[bool]] = None,
              to_frame: tp.Optional[bool] = None,
              align_index: tp.Optional[bool] = None,
              align_columns: tp.Optional[bool] = None,
              index_from: tp.Optional[IndexFromLike] = None,
              columns_from: tp.Optional[IndexFromLike] = None,
              require_kwargs: tp.KwargsLikeSequence = None,
              keep_raw: tp.Optional[tp.MaybeSequence[bool]] = False,
              return_meta: bool = False,
              **kwargs) -> BCRT:
    """Bring any array-like object in `args` to the same shape by using NumPy broadcasting.

    See [Broadcasting](https://docs.scipy.org/doc/numpy/user/basics.broadcasting.html).

    Can broadcast pandas objects by broadcasting their index/columns with `broadcast_index`.

    Args:
        *args (array_like): Array-like objects.
        to_shape (tuple of int): Target shape. If set, will broadcast every element in `args` to `to_shape`.
        to_pd (bool or list of bool): Whether to convert all output arrays to pandas, otherwise returns
            raw NumPy arrays. If None, converts only if there is at least one pandas object among them.

            If sequence, applies to each argument.
        to_frame (bool): Whether to convert all Series to DataFrames.
        align_index (bool): Whether to align index of pandas objects using multi-index.

            Pass None to use the default.
        align_columns (bool): Whether to align columns of pandas objects using multi-index.

            Pass None to use the default.
        index_from (any): Broadcasting rule for index.

            Pass None to use the default.
        columns_from (any): Broadcasting rule for columns.

            Pass None to use the default.
        require_kwargs (dict or list of dict): Keyword arguments passed to `np.require`.

            If sequence, applies to each argument.
        keep_raw (bool or list of bool): Whether to keep the unbroadcasted version of the array.

            Only makes sure that the array can be broadcast to the target shape.

            If sequence, applies to each argument.
        return_meta (bool): Whether to also return new shape, index and columns.
        **kwargs: Keyword arguments passed to `broadcast_index`.

    For defaults, see `broadcasting` in `vectorbt._settings.settings`.

    Usage:
        * Without broadcasting index and columns:

        ```pycon
        >>> import numpy as np
        >>> import pandas as pd
        >>> from vectorbt.base.reshape_fns import broadcast

        >>> v = 0
        >>> a = np.array([1, 2, 3])
        >>> sr = pd.Series([1, 2, 3], index=pd.Index(['x', 'y', 'z']), name='a')
        >>> df = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]],
        ...     index=pd.Index(['x2', 'y2', 'z2']),
        ...     columns=pd.Index(['a2', 'b2', 'c2']))

        >>> for i in broadcast(
        ...     v, a, sr, df,
        ...     index_from='keep',
        ...     columns_from='keep',
        ... ): print(i)
           0  1  2
        0  0  0  0
        1  0  0  0
        2  0  0  0
           0  1  2
        0  1  2  3
        1  1  2  3
        2  1  2  3
           a  a  a
        x  1  1  1
        y  2  2  2
        z  3  3  3
            a2  b2  c2
        x2   1   2   3
        y2   4   5   6
        z2   7   8   9
        ```

        * Taking new index and columns from position:

        ```pycon
        >>> for i in broadcast(
        ...     v, a, sr, df,
        ...     index_from=2,
        ...     columns_from=3
        ... ): print(i)
           a2  b2  c2
        x   0   0   0
        y   0   0   0
        z   0   0   0
           a2  b2  c2
        x   1   2   3
        y   1   2   3
        z   1   2   3
           a2  b2  c2
        x   1   1   1
        y   2   2   2
        z   3   3   3
           a2  b2  c2
        x   1   2   3
        y   4   5   6
        z   7   8   9
        ```

        * Broadcasting index and columns through stacking:

        ```pycon
        >>> for i in broadcast(
        ...     v, a, sr, df,
        ...     index_from='stack',
        ...     columns_from='stack'
        ... ): print(i)
              a2  b2  c2
        x x2   0   0   0
        y y2   0   0   0
        z z2   0   0   0
              a2  b2  c2
        x x2   1   2   3
        y y2   1   2   3
        z z2   1   2   3
              a2  b2  c2
        x x2   1   1   1
        y y2   2   2   2
        z z2   3   3   3
              a2  b2  c2
        x x2   1   2   3
        y y2   4   5   6
        z z2   7   8   9
        ```

        * Setting index and columns manually:

        ```pycon
        >>> for i in broadcast(
        ...     v, a, sr, df,
        ...     index_from=['a', 'b', 'c'],
        ...     columns_from=['d', 'e', 'f']
        ... ): print(i)
           d  e  f
        a  0  0  0
        b  0  0  0
        c  0  0  0
           d  e  f
        a  1  2  3
        b  1  2  3
        c  1  2  3
           d  e  f
        a  1  1  1
        b  2  2  2
        c  3  3  3
           d  e  f
        a  1  2  3
        b  4  5  6
        c  7  8  9
        ```
    """
    from vectorbt._settings import settings
    broadcasting_cfg = settings['broadcasting']

    is_pd = False
    is_2d = False
    if require_kwargs is None:
        require_kwargs = {}
    if align_index is None:
        align_index = broadcasting_cfg['align_index']
    if align_columns is None:
        align_columns = broadcasting_cfg['align_columns']
    if index_from is None:
        index_from = broadcasting_cfg['index_from']
    if columns_from is None:
        columns_from = broadcasting_cfg['columns_from']

    # Convert to np.ndarray object if not numpy or pandas
    # Also check whether we broadcast to pandas and whether work on 2-dim data
    arr_args = []
    for i in range(len(args)):
        arg = to_any_array(args[i])
        if arg.ndim > 1:
            is_2d = True
        if checks.is_pandas(arg):
            is_pd = True
        arr_args.append(arg)

    # If target shape specified, check again if we work on 2-dim data
    if to_shape is not None:
        if isinstance(to_shape, int):
            to_shape = (to_shape, )
        checks.assert_instance_of(to_shape, tuple)
        if len(to_shape) > 1:
            is_2d = True

    if to_frame is not None:
        # force either keeping Series or converting them to DataFrames
        is_2d = to_frame

    if to_pd is not None:
        # force either raw or pandas
        if isinstance(to_pd, Sequence):
            is_pd = any(to_pd)
        else:
            is_pd = to_pd

    # Align pandas objects
    if align_index:
        index_to_align = []
        for i in range(len(arr_args)):
            if checks.is_pandas(arr_args[i]) and len(arr_args[i].index) > 1:
                index_to_align.append(i)
        if len(index_to_align) > 1:
            indexes = [arr_args[i].index for i in index_to_align]
            if len(set(map(len, indexes))) > 1:
                index_indices = index_fns.align_indexes(indexes)
                for i in index_to_align:
                    arr_args[i] = arr_args[i].iloc[index_indices[
                        index_to_align.index(i)]]
    if align_columns:
        cols_to_align = []
        for i in range(len(arr_args)):
            if checks.is_frame(arr_args[i]) and len(arr_args[i].columns) > 1:
                cols_to_align.append(i)
        if len(cols_to_align) > 1:
            indexes = [arr_args[i].columns for i in cols_to_align]
            if len(set(map(len, indexes))) > 1:
                col_indices = index_fns.align_indexes(indexes)
                for i in cols_to_align:
                    arr_args[i] = arr_args[i].iloc[:,
                                                   col_indices[cols_to_align.
                                                               index(i)]]

    # Convert all pd.Series objects to pd.DataFrame if we work on 2-dim data
    arr_args_2d = [
        arg.to_frame() if is_2d and checks.is_series(arg) else arg
        for arg in arr_args
    ]

    # Get final shape
    if to_shape is None:
        to_shape = _broadcast_shape(*map(np.asarray, arr_args_2d))

    # Perform broadcasting
    new_args = []
    for i, arg in enumerate(arr_args_2d):
        if isinstance(keep_raw, Sequence):
            _keep_raw = keep_raw[i]
        else:
            _keep_raw = keep_raw
        bc_arg = np.broadcast_to(arg, to_shape)
        if _keep_raw:
            new_args.append(arg)
            continue
        new_args.append(bc_arg)

    # Force to match requirements
    for i in range(len(new_args)):
        _require_kwargs = resolve_dict(require_kwargs, i=i)
        new_args[i] = np.require(new_args[i], **_require_kwargs)

    if is_pd:
        # Decide on index and columns
        # NOTE: Important to pass arr_args, not arr_args_2d, to preserve original shape info
        new_index = broadcast_index(arr_args,
                                    to_shape,
                                    index_from=index_from,
                                    axis=0,
                                    **kwargs)
        new_columns = broadcast_index(arr_args,
                                      to_shape,
                                      index_from=columns_from,
                                      axis=1,
                                      **kwargs)
    else:
        new_index, new_columns = None, None

    # Bring arrays to their old types (e.g. array -> pandas)
    for i in range(len(new_args)):
        if isinstance(keep_raw, Sequence):
            _keep_raw = keep_raw[i]
        else:
            _keep_raw = keep_raw
        if _keep_raw:
            continue
        if isinstance(to_pd, Sequence):
            _is_pd = to_pd[i]
        else:
            _is_pd = is_pd
        new_args[i] = wrap_broadcasted(arr_args[i],
                                       new_args[i],
                                       is_pd=_is_pd,
                                       new_index=new_index,
                                       new_columns=new_columns)

    if len(new_args) > 1:
        if return_meta:
            return tuple(new_args), to_shape, new_index, new_columns
        return tuple(new_args)
    if return_meta:
        return new_args[0], to_shape, new_index, new_columns
    return new_args[0]