Пример #1
0
def make_symmetric(arg):
    """Make object symmetric along the diagonal."""
    checks.assert_type(arg, (pd.Series, pd.DataFrame))
    arg = to_2d(arg)
    checks.assert_same_type(arg.index, arg.columns)
    if isinstance(arg.index, pd.MultiIndex):
        checks.assert_same_len(arg.index.names, arg.columns.names)
        names1, names2 = tuple(arg.index.names), tuple(arg.columns.names)
    else:
        names1, names2 = arg.index.name, arg.columns.name

    if names1 == names2:
        new_name = names1
    else:
        if isinstance(arg.index, pd.MultiIndex):
            new_name = tuple(zip(*[names1, names2]))
        else:
            new_name = (names1, names2)
    idx_vals = np.unique(np.concatenate((arg.index, arg.columns)))
    arg = arg.copy()
    if isinstance(arg.index, pd.MultiIndex):
        unique_index = pd.MultiIndex.from_tuples(idx_vals, names=new_name)
        arg.index.names = new_name
        arg.columns.names = new_name
    else:
        unique_index = pd.Index(idx_vals, name=new_name)
        arg.index.name = new_name
        arg.columns.name = new_name
    df_out = pd.DataFrame(index=unique_index, columns=unique_index)
    df_out.loc[:, :] = arg
    df_out[df_out.isnull()] = arg.transpose()
    return df_out
Пример #2
0
            def __init__(self, param_mappers):
                checks.assert_same_len(param_names, param_mappers)
                
                class ParamLoc:
                    def __init__(self, obj, mapper):
                        checks.assert_type(mapper, pd.Series)

                        self.obj = obj
                        if mapper.dtype == 'O':
                            # If params are objects, we must cast them to string first
                            # The original mapper isn't touched
                            mapper = mapper.astype(str)
                        self.mapper = mapper

                    def get_indices(self, key):
                        if self.mapper.dtype == 'O':
                            # We must also cast the key to string
                            if isinstance(key, slice):
                                start = str(key.start) if key.start is not None else None
                                stop = str(key.stop) if key.stop is not None else None
                                key = slice(start, stop, key.step)
                            elif isinstance(key, (list, np.ndarray)):
                                key = list(map(str, key))
                            else:
                                # Tuples, objects, etc.
                                key = str(key)
                        mapper = self.mapper
                        # Use pandas to perform indexing
                        mapper = pd.Series(np.arange(len(mapper.index)), index=mapper.values)
                        indices = mapper.loc.__getitem__(key)
                        if isinstance(indices, pd.Series):
                            indices = indices.values
                        return indices

                    def __getitem__(self, key):
                        indices = self.get_indices(key)
                        is_multiple = isinstance(key, (slice, list, np.ndarray))
                        level_name = self.mapper.name  # name of the mapper should contain level names of the params

                        def pd_indexing_func(obj):
                            new_obj = obj.iloc[:, indices]
                            if not is_multiple:
                                # If we selected only one param, then remove its columns levels to keep it clean
                                if level_name is not None:
                                    if checks.is_frame(new_obj):
                                        if isinstance(new_obj.columns, pd.MultiIndex):
                                            new_obj.columns = index_fns.drop_levels(new_obj.columns, level_name)
                            return new_obj

                        return indexing_func(self.obj, pd_indexing_func)
                
                for i, param_name in enumerate(param_names):
                    setattr(self, f'_{param_name}_loc', ParamLoc(self, param_mappers[i]))
Пример #3
0
def make_symmetric(arg):
    """Make `arg` symmetric.

    The index and columns of the resulting DataFrame will be identical.

    Requires the index and columns to have the same number of levels.

    Example:
        ```python-repl
        >>> import pandas as pd
        >>> from vectorbt.utils.reshape_fns import make_symmetric

        >>> df = pd.DataFrame([[1, 2], [3, 4]], index=['a', 'b'], columns=['c', 'd'])

        >>> print(make_symmetric(df))
             a    b    c    d
        a  NaN  NaN  1.0  2.0
        b  NaN  NaN  3.0  4.0
        c  1.0  3.0  NaN  NaN
        d  2.0  4.0  NaN  NaN
        ```"""
    checks.assert_type(arg, (pd.Series, pd.DataFrame))
    arg = to_2d(arg)
    checks.assert_same_type(arg.index, arg.columns)
    if isinstance(arg.index, pd.MultiIndex):
        checks.assert_same_len(arg.index.names, arg.columns.names)
        names1, names2 = tuple(arg.index.names), tuple(arg.columns.names)
    else:
        names1, names2 = arg.index.name, arg.columns.name

    if names1 == names2:
        new_name = names1
    else:
        if isinstance(arg.index, pd.MultiIndex):
            new_name = tuple(zip(*[names1, names2]))
        else:
            new_name = (names1, names2)
    idx_vals = np.unique(np.concatenate((arg.index, arg.columns)))
    arg = arg.copy()
    if isinstance(arg.index, pd.MultiIndex):
        unique_index = pd.MultiIndex.from_tuples(idx_vals, names=new_name)
        arg.index.names = new_name
        arg.columns.names = new_name
    else:
        unique_index = pd.Index(idx_vals, name=new_name)
        arg.index.name = new_name
        arg.columns.name = new_name
    df_out = pd.DataFrame(index=unique_index, columns=unique_index)
    df_out.loc[:, :] = arg
    df_out[df_out.isnull()] = arg.transpose()
    return df_out
Пример #4
0
def group_by_to_index(index, group_by):
    """Convert mapper to `pd.Index`.

    `group_by` can be integer (level by position), string (level by name), tuple or list
    (multiple levels), index or series (named index with groups), or NumPy array (raw groups).

    !!! note
        Index and mapper must have the same length."""
    if isinstance(group_by, (int, str, tuple, list)):
        group_by = select_levels(index, group_by)
    if not isinstance(group_by, pd.Index):
        group_by = pd.Index(group_by)
    checks.assert_same_len(index, group_by)
    return group_by
Пример #5
0
 def test_assert_same_len(self):
     checks.assert_same_len([[1]], [[2]])
     checks.assert_same_len([[1]], [[2, 3]])
     try:
         checks.assert_same_len([[1]], [[2], [3]])
         raise Exception
     except:
         pass
Пример #6
0
            def __init__(self, param_mappers, indexing_func):
                checks.assert_same_len(param_names, param_mappers)

                for i, param_name in enumerate(param_names):
                    setattr(self, f'_{param_name}_loc',
                            _ParamLoc(self, param_mappers[i], indexing_func))
Пример #7
0
def from_params_pipeline(ts_list,
                         param_list,
                         level_names,
                         num_outputs,
                         custom_func,
                         *args,
                         pass_lists=False,
                         param_product=False,
                         broadcast_kwargs={},
                         return_raw=False,
                         **kwargs):
    """A pipeline for calculating an indicator, used by `IndicatorFactory`.

    Does the following:

    * Takes one or multiple time series objects in `ts_list` and broadcasts them. For example:

    ```python-repl
    >>> sr = pd.Series([1, 2], index=['x', 'y'])
    >>> df = pd.DataFrame([[3, 4], [5, 6]], index=['x', 'y'], columns=['a', 'b'])
    >>> ts_list = [sr, df]

    >>> ts_list = vbt.utils.reshape_fns.broadcast(*ts_list)
    >>> print(ts_list[0])
       a  b
    x  1  1
    y  2  2
    >>> print(ts_list[1])
       a  b
    x  3  4
    y  5  6
    ```

    * Takes one or multiple parameters in `param_list`, converts them to NumPy arrays and 
        broadcasts them. For example:

    ```python-repl
    >>> p1, p2, p3 = 1, [2, 3, 4], [False]
    >>> param_list = [p1, p2, p3]

    >>> param_list = vbt.utils.reshape_fns.broadcast(*param_list)
    >>> print(param_list[0])
    array([1, 1, 1])
    >>> print(param_list[1])
    array([2, 3, 4])
    >>> print(param_list[2])
    array([False, False, False])
    ```

    * Performs calculation using `custom_func` to build output arrays (`output_list`) and 
        other objects (`other_list`, optional). For example:

    ```python-repl
    >>> def custom_func(ts1, ts2, p1, p2, p3, *args, **kwargs):
    ...     return pd.DataFrame.vbt.concat(
    ...         (ts1.values + ts2.values) + p1[0] * p2[0],
    ...         (ts1.values + ts2.values) + p1[1] * p2[1],
    ...         (ts1.values + ts2.values) + p1[2] * p2[2]
    ...     )

    >>> output = custom_func(*ts_list, *param_list)
    >>> print(output)
    array([[ 6,  7,  7,  8,  8,  9],
           [ 9, 10, 10, 11, 11, 12]])
    ```

    * Creates new column hierarchy based on parameters and level names. For example:

    ```python-repl
    >>> p1_columns = pd.Index(param_list[0], name='p1')
    >>> p2_columns = pd.Index(param_list[1], name='p2')
    >>> p3_columns = pd.Index(param_list[2], name='p3')
    >>> p_columns = vbt.utils.index_fns.stack(p1_columns, p2_columns, p3_columns)
    >>> new_columns = vbt.utils.index_fns.combine(p_columns, ts_list[0].columns)

    >>> output_df = pd.DataFrame(output, columns=new_columns)
    >>> print(output_df)
    p1      1      1      1      1      1      1                        
    p2      2      2      3      3      4      4    
    p3  False  False  False  False  False  False    
            a      b      a      b      a      b
    0       6      7      7      8      8      9
    1       9     10     10     11     11     12
    ```

    * Broadcasts objects in `ts_list` to match the shape of objects in `output_list` through tiling.
        This is done to be able to compare them and generate signals, since you cannot compare NumPy 
        arrays that have totally different shapes, such as (2, 2) and (2, 6). For example:

    ```python-repl
    >>> new_ts_list = [
    ...     ts_list[0].vbt.tile(len(param_list[0]), as_columns=p_columns),
    ...     ts_list[1].vbt.tile(len(param_list[0]), as_columns=p_columns)
    ... ]
    >>> print(new_ts_list[0])
    p1      1      1      1      1      1      1                        
    p2      2      2      3      3      4      4    
    p3  False  False  False  False  False  False     
            a      b      a      b      a      b
    0       1      1      1      1      1      1
    1       2      2      2      2      2      2
    ```

    * Builds parameter mappers that will link parameters from `param_list` to columns in 
        `ts_list` and `output_list`. This is done to enable column indexing using parameter values.

    Args:
        ts_list (list of array_like): A list of time series objects. At least one must be a pandas object.
        param_list (list of array_like): A list of parameters. Each element is either an array-like object
            or a single value of any type.
        level_names (list of str): A list of column level names corresponding to each parameter.
        num_outputs (int): The number of output arrays.
        custom_func (function): A custom calculation function. See `IndicatorFactory.from_custom_func`.
        *args: Arguments passed to the `custom_func`.
        pass_lists (bool): If True, arguments are passed to the `custom_func` as lists. Defaults to False.
        param_product (bool): If True, builds a Cartesian product out of all parameters. Defaults to False.
        broadcast_kwargs (dict, optional): Keyword arguments passed to the `vectorbt.utils.reshape_fns.broadcast` 
            on time series objects.
        return_raw (bool): If True, returns the raw output without post-processing. Defaults to False.
        **kwargs: Keyword arguments passed to the `custom_func`.

            Some common arguments include `return_cache` to return cache and `cache` to pass cache. 
            Those are only applicable to `custom_func` that supports it (`custom_func` created using
            `IndicatorFactory.from_apply_func` are supported by default).
    Returns:
        A list of transformed inputs (`pandas_like`), a list of generated outputs (`pandas_like`), 
        a list of parameter arrays (`numpy.ndarray`), a list of parameter mappers (`pandas.Series`),
        a list of other generated outputs that are outside of  `num_outputs`.
    """
    # Check time series objects
    checks.assert_type(ts_list[0], (pd.Series, pd.DataFrame))
    for i in range(1, len(ts_list)):
        ts_list[i].vbt.timeseries.validate()
    if len(ts_list) > 1:
        # Broadcast time series
        ts_list = reshape_fns.broadcast(*ts_list,
                                        **broadcast_kwargs,
                                        writeable=True)
    # Check level names
    checks.assert_type(level_names, (list, tuple))
    checks.assert_same_len(param_list, level_names)
    for ts in ts_list:
        # Every time series object should be free of the specified level names in its columns
        for level_name in level_names:
            checks.assert_level_not_exists(ts, level_name)
    # Convert params to 1-dim arrays
    param_list = list(map(reshape_fns.to_1d, param_list))
    if len(param_list) > 1:
        if param_product:
            # Make Cartesian product out of all params
            param_list = list(map(reshape_fns.to_1d, param_list))
            param_list = list(zip(*list(itertools.product(*param_list))))
            param_list = list(map(np.asarray, param_list))
        else:
            # Broadcast such that each array has the same length
            param_list = reshape_fns.broadcast(*param_list, writeable=True)
    # Perform main calculation
    if pass_lists:
        output_list = custom_func(ts_list, param_list, *args, **kwargs)
    else:
        output_list = custom_func(*ts_list, *param_list, *args, **kwargs)
    if return_raw or kwargs.get('return_cache', False):
        return output_list  # return raw cache outputs
    if not isinstance(output_list, (tuple, list, List)):
        output_list = [output_list]
    else:
        output_list = list(output_list)
    # Other outputs should be returned without post-processing (for example cache_dict)
    if len(output_list) > num_outputs:
        other_list = output_list[num_outputs:]
    else:
        other_list = []
    # Process only the num_outputs outputs
    output_list = output_list[:num_outputs]
    if len(param_list) > 0:
        # Build new column levels on top of time series levels
        new_columns = build_column_hierarchy(
            param_list, level_names,
            reshape_fns.to_2d(ts_list[0]).columns)
        # Wrap into new pandas objects both time series and output objects
        new_ts_list = list(
            map(lambda x: broadcast_ts(x, param_list[0].shape[0], new_columns),
                ts_list))
        # Build mappers to easily map between parameters and columns
        mapper_list = [
            build_mapper(x, ts_list[0], new_columns, level_names[i])
            for i, x in enumerate(param_list)
        ]
    else:
        # Some indicators don't have any params
        new_columns = reshape_fns.to_2d(ts_list[0]).columns
        new_ts_list = list(ts_list)
        mapper_list = []
    output_list = list(
        map(lambda x: wrap_output(x, ts_list[0], new_columns), output_list))
    if len(mapper_list) > 1:
        # Tuple object is a mapper that accepts tuples of parameters
        tuple_mapper = build_tuple_mapper(mapper_list, new_columns,
                                          tuple(level_names))
        mapper_list.append(tuple_mapper)
    return new_ts_list, output_list, param_list, mapper_list, other_list
Пример #8
0
 def test_assert_same_len(self):
     checks.assert_same_len([[1]], [[2]])
     checks.assert_same_len([[1]], [[2, 3]])
     with pytest.raises(Exception) as e_info:
         checks.assert_same_len([[1]], [[2], [3]])