Пример #1
0
def group_by_to_index(index: tp.Index, group_by: tp.GroupByLike) -> GroupByT:
    """Convert mapper `group_by` to `pd.Index`.

    !!! note
        Index and mapper must have the same length."""
    if group_by is None or group_by is False:
        return group_by
    if group_by is True:
        group_by = pd.Index(np.full(len(index), 0))  # one group
    elif isinstance(group_by, (int, str)):
        group_by = index_fns.select_levels(index, group_by)
    elif checks.is_sequence(group_by):
        if len(group_by) != len(index) \
                and isinstance(group_by[0], (int, str)) \
                and isinstance(index, pd.MultiIndex) \
                and len(group_by) <= len(index.names):
            try:
                group_by = index_fns.select_levels(index, group_by)
            except (IndexError, KeyError):
                pass
    if not isinstance(group_by, pd.Index):
        group_by = pd.Index(group_by)
    if len(group_by) != len(index):
        raise ValueError("group_by and index must have the same length")
    return group_by
Пример #2
0
def unstack_to_df(arg,
                  index_levels=None,
                  column_levels=None,
                  symmetric=False,
                  sort=True):
    """Reshape `arg` based on its multi-index into a DataFrame.

    Use `index_levels` to specify what index levels will form new index, and `column_levels` 
    for new columns. Set `symmetric` to True to make DataFrame symmetric.

    ## Example

    ```python-repl
    >>> import pandas as pd
    >>> from vectorbt.base.reshape_fns import unstack_to_df

    >>> index = pd.MultiIndex.from_arrays(
    ...     [[1, 1, 2, 2], [3, 4, 3, 4], ['a', 'b', 'c', 'd']],
    ...     names=['x', 'y', 'z'])
    >>> sr = pd.Series([1, 2, 3, 4], index=index)

    >>> unstack_to_df(sr, index_levels=(0, 1), column_levels=2)
    z      a    b    c    d
    x y
    1 3  1.0  NaN  NaN  NaN
    1 4  NaN  2.0  NaN  NaN
    2 3  NaN  NaN  3.0  NaN
    2 4  NaN  NaN  NaN  4.0
    ```
    """
    # Perform checks
    checks.assert_type(arg, (pd.Series, pd.DataFrame))
    if checks.is_frame(arg):
        if arg.shape[0] == 1:
            arg = arg.iloc[0, :]
        elif arg.shape[1] == 1:
            arg = arg.iloc[:, 0]
    checks.assert_type(arg.index, pd.MultiIndex)
    sr = to_1d(arg)

    if len(sr.index.levels) > 2:
        if index_levels is None:
            raise ValueError("index_levels must be specified")
        if column_levels is None:
            raise ValueError("column_levels must be specified")
    else:
        index_levels = 0
        column_levels = 1

    # Build new index and column hierarchies
    new_index = index_fns.select_levels(arg.index, index_levels).unique()
    new_columns = index_fns.select_levels(arg.index, column_levels).unique()

    # Unstack and post-process
    unstacked = unstack_to_array(sr, levels=(index_levels, column_levels))
    df = pd.DataFrame(unstacked, index=new_index, columns=new_columns)
    if symmetric:
        return make_symmetric(df, sort=sort)
    return df
Пример #3
0
def unstack_to_df(arg: tp.SeriesFrame,
                  index_levels: tp.Optional[tp.MaybeLevelSequence] = None,
                  column_levels: tp.Optional[tp.MaybeLevelSequence] = None,
                  symmetric: bool = False,
                  sort: bool = True) -> tp.Frame:
    """Reshape `arg` based on its multi-index into a DataFrame.

    Use `index_levels` to specify what index levels will form new index, and `column_levels` 
    for new columns. Set `symmetric` to True to make DataFrame symmetric.

    ## Example

    ```python-repl
    >>> import pandas as pd
    >>> from vectorbt.base.reshape_fns import unstack_to_df

    >>> index = pd.MultiIndex.from_arrays(
    ...     [[1, 1, 2, 2], [3, 4, 3, 4], ['a', 'b', 'c', 'd']],
    ...     names=['x', 'y', 'z'])
    >>> sr = pd.Series([1, 2, 3, 4], index=index)

    >>> unstack_to_df(sr, index_levels=(0, 1), column_levels=2)
    z      a    b    c    d
    x y
    1 3  1.0  NaN  NaN  NaN
    1 4  NaN  2.0  NaN  NaN
    2 3  NaN  NaN  3.0  NaN
    2 4  NaN  NaN  NaN  4.0
    ```
    """
    # Extract series
    sr: tp.Series = to_1d(get_multiindex_series(arg))

    if len(sr.index.levels) > 2:
        if index_levels is None:
            raise ValueError("index_levels must be specified")
        if column_levels is None:
            raise ValueError("column_levels must be specified")
    else:
        if index_levels is None:
            index_levels = 0
        if column_levels is None:
            column_levels = 1

    # Build new index and column hierarchies
    new_index = index_fns.select_levels(arg.index, index_levels).unique()
    new_columns = index_fns.select_levels(arg.index, column_levels).unique()

    # Unstack and post-process
    unstacked = unstack_to_array(sr, levels=(index_levels, column_levels))
    df = pd.DataFrame(unstacked, index=new_index, columns=new_columns)
    if symmetric:
        return make_symmetric(df, sort=sort)
    return df
Пример #4
0
def unstack_to_array(
        arg: tp.SeriesFrame,
        levels: tp.Optional[tp.MaybeLevelSequence] = None) -> tp.Array:
    """Reshape `arg` based on its multi-index into a multi-dimensional array.

    Use `levels` to specify what index levels to unstack and in which order.

    ## Example

    ```python-repl
    >>> import pandas as pd
    >>> from vectorbt.base.reshape_fns import unstack_to_array

    >>> index = pd.MultiIndex.from_arrays(
    ...     [[1, 1, 2, 2], [3, 4, 3, 4], ['a', 'b', 'c', 'd']])
    >>> sr = pd.Series([1, 2, 3, 4], index=index)

    >>> unstack_to_array(sr).shape
    (2, 2, 4)

    >>> unstack_to_array(sr)
    [[[ 1. nan nan nan]
     [nan  2. nan nan]]

     [[nan nan  3. nan]
    [nan nan nan  4.]]]

    >>> unstack_to_array(sr, levels=(2, 0))
    [[ 1. nan]
     [ 2. nan]
     [nan  3.]
     [nan  4.]]
    ```
    """
    # Extract series
    sr: tp.Series = to_1d(get_multiindex_series(arg))
    if sr.index.duplicated().any():
        raise ValueError("Index contains duplicate entries, cannot reshape")

    unique_idx_list = []
    vals_idx_list = []
    if levels is None:
        levels = range(sr.index.nlevels)
    if isinstance(levels, (int, str)):
        levels = (levels, )
    for level in levels:
        vals = index_fns.select_levels(sr.index, level).to_numpy()
        unique_vals = np.unique(vals)
        unique_idx_list.append(unique_vals)
        idx_map = dict(zip(unique_vals, range(len(unique_vals))))
        vals_idx = list(map(lambda x: idx_map[x], vals))
        vals_idx_list.append(vals_idx)

    a = np.full(list(map(len, unique_idx_list)), np.nan)
    a[tuple(zip(vals_idx_list))] = sr.values
    return a
Пример #5
0
def unstack_to_array(arg, levels=None):
    """Reshape `arg` based on its multi-index into a multi-dimensional array.

    Use `levels` to specify what index levels to unstack and in which order.

    ## Example

    ```python-repl
    >>> import pandas as pd
    >>> from vectorbt.base.reshape_fns import unstack_to_array

    >>> index = pd.MultiIndex.from_arrays(
    ...     [[1, 1, 2, 2], [3, 4, 3, 4], ['a', 'b', 'c', 'd']])
    >>> sr = pd.Series([1, 2, 3, 4], index=index)

    >>> unstack_to_array(sr).shape
    (2, 2, 4)

    >>> unstack_to_array(sr)
    [[[ 1. nan nan nan]
     [nan  2. nan nan]]

     [[nan nan  3. nan]
    [nan nan nan  4.]]]

    >>> unstack_to_array(sr, levels=(2, 0))
    [[ 1. nan]
     [ 2. nan]
     [nan  3.]
     [nan  4.]]
    ```
    """
    checks.assert_type(arg, pd.Series)
    checks.assert_type(arg.index, pd.MultiIndex)

    unique_idx_list = []
    vals_idx_list = []
    if levels is None:
        levels = range(arg.index.nlevels)
    for level in levels:
        vals = index_fns.select_levels(arg.index, level).to_numpy()
        unique_vals = np.unique(vals)
        unique_idx_list.append(unique_vals)
        idx_map = dict(zip(unique_vals, range(len(unique_vals))))
        vals_idx = list(map(lambda x: idx_map[x], vals))
        vals_idx_list.append(vals_idx)

    a = np.full(list(map(len, unique_idx_list)), np.nan)
    a[tuple(zip(vals_idx_list))] = arg.values
    return a
Пример #6
0
def group_by_to_index(index, group_by):
    """Convert mapper to `pd.Index`.

    `group_by` can be integer (level by position), string (level by name), tuple or list
    (multiple levels), index or series (named index with groups), or NumPy array (raw groups).

    !!! note
        Index and mapper must have the same length."""
    if isinstance(group_by, (int, str, tuple, list)):
        group_by = select_levels(index, group_by)
    if not isinstance(group_by, pd.Index):
        group_by = pd.Index(group_by)
    checks.assert_same_len(index, group_by)
    return group_by
Пример #7
0
def group_by_to_index(index, group_by):
    """Convert mapper `group_by` to `pd.Index`.

    !!! note
        Index and mapper must have the same length."""
    if group_by is None or isinstance(group_by, bool):
        return group_by

    if isinstance(group_by, (int, str, tuple, list)):
        group_by = index_fns.select_levels(index, group_by)
    if not isinstance(group_by, pd.Index):
        group_by = pd.Index(group_by)
    if len(group_by) != len(index):
        raise ValueError("group_by and index must have the same length")
    return group_by
Пример #8
0
 def apply_func(obj_index: tp.Index) -> tp.Index:
     return index_fns.select_levels(obj_index, level_names)