def split_into_ranges(self, n=None, range_len=None): """Split into `n` ranges each `range_len` long. At least one of `range_len` and `n` must be set. If `range_len` is `None`, will split evenly into `n` ranges. If `n` is `None`, will return the maximum number of ranges of length `range_len`. !!! note The datetime-like format of the index will be lost as result of this operation. Make sure to store the index metadata such as frequency information beforehand. Example: ```python-repl >>> print(df.vbt.split_into_ranges(n=2)) a b c range_start 2020-01-01 2020-01-04 2020-01-01 2020-01-04 2020-01-01 2020-01-04 range_end 2020-01-02 2020-01-05 2020-01-02 2020-01-05 2020-01-02 2020-01-05 0 1.0 4.0 5.0 2.0 1.0 2.0 1 2.0 5.0 4.0 1.0 2.0 1.0 >>> print(df.vbt.split_into_ranges(range_len=4)) a b c range_start 2020-01-01 2020-01-02 2020-01-01 2020-01-02 2020-01-01 2020-01-02 range_end 2020-01-04 2020-01-05 2020-01-04 2020-01-05 2020-01-04 2020-01-05 0 1.0 2.0 5.0 4.0 1.0 2.0 1 2.0 3.0 4.0 3.0 2.0 3.0 2 3.0 4.0 3.0 2.0 3.0 2.0 3 4.0 5.0 2.0 1.0 2.0 1.0 ```""" if range_len is None and n is None: raise ValueError("At least range_len or n must be set") if range_len is None: range_len = len(self.index) // n cube = nb.rolling_window_nb(self.to_2d_array(), range_len) if n is not None: if n > cube.shape[2]: raise ValueError(f"n cannot be bigger than the maximum number of ranges {cube.shape[2]}") idxs = np.round(np.linspace(0, cube.shape[2] - 1, n)).astype(int) cube = cube[:, :, idxs] else: idxs = np.arange(cube.shape[2]) matrix = np.hstack(cube) range_starts = pd.Index(self.index[idxs], name='range_start') range_ends = pd.Index(self.index[idxs + range_len - 1], name='range_end') range_columns = index_fns.stack_indexes(range_starts, range_ends) new_columns = index_fns.combine_indexes(self.columns, range_columns) return pd.DataFrame(matrix, columns=new_columns)
def build_column_hierarchy(param_list, level_names, ts_columns): """For each parameter in `param_list`, create a new column level with parameter values. Combine this level with columns `ts_columns` using Cartesian product. Excludes level names that are `None`.""" checks.assert_same_shape(param_list, level_names, axis=0) param_indexes = [] for i in range(len(param_list)): if level_names[i] is not None: param_index = index_fns.index_from_values(param_list[i], name=level_names[i]) param_indexes.append(param_index) if len(param_indexes) > 1: param_columns = index_fns.stack_indexes(*param_indexes) elif len(param_indexes) == 1: param_columns = param_indexes[0] else: param_columns = None if param_columns is not None: return index_fns.combine_indexes(param_columns, ts_columns) return ts_columns
def broadcast_index(args, to_shape, index_from=None, axis=0, ignore_sr_names=None, **kwargs): """Produce a broadcast index/columns. Args: *args (array_like): Array-like objects. to_shape (tuple): Target shape. index_from (None, int, str or array_like): Broadcasting rule for this index/these columns. Accepts the following values: * 'default' - take the value from `vectorbt.settings.broadcasting` * 'strict' - ensure that all pandas objects have the same index/columns * 'stack' - stack different indexes/columns using `vectorbt.base.index_fns.stack_indexes` * 'ignore' - ignore any index/columns * integer - use the index/columns of the i-nth object in `args` * None - use the original index/columns of the objects in `args` * everything else will be converted to `pd.Index` axis (int): Set to 0 for index and 1 for columns. ignore_sr_names (bool): Whether to ignore Series names if they are in conflict. Conflicting Series names are those that are different but not None. **kwargs: Keyword arguments passed to `vectorbt.base.index_fns.stack_indexes`. For defaults, see `vectorbt.settings.broadcasting`. !!! note Series names are treated as columns with a single element but without a name. If a column level without a name loses its meaning, better to convert Series to DataFrames with one column prior to broadcasting. If the name of a Series is not that important, better to drop it altogether by setting it to None. """ from vectorbt import settings if ignore_sr_names is None: ignore_sr_names = settings.broadcasting['ignore_sr_names'] index_str = 'columns' if axis == 1 else 'index' to_shape_2d = (to_shape[0], 1) if len(to_shape) == 1 else to_shape # maxlen stores the length of the longest index maxlen = to_shape_2d[1] if axis == 1 else to_shape_2d[0] new_index = None if index_from is not None: if isinstance(index_from, int): # Take index/columns of the object indexed by index_from if not checks.is_pandas(args[index_from]): raise TypeError( f"Argument under index {index_from} must be a pandas object" ) new_index = index_fns.get_index(args[index_from], axis) elif isinstance(index_from, str): if index_from == 'ignore': # Ignore index/columns new_index = pd.RangeIndex(start=0, stop=maxlen, step=1) elif index_from in ('stack', 'strict'): # Check whether all indexes/columns are equal last_index = None # of type pd.Index index_conflict = False for arg in args: if checks.is_pandas(arg): index = index_fns.get_index(arg, axis) if last_index is not None: if not pd.Index.equals(index, last_index): index_conflict = True last_index = index continue if not index_conflict: new_index = last_index else: # If pandas objects have different index/columns, stack them together for arg in args: if checks.is_pandas(arg): index = index_fns.get_index(arg, axis) if axis == 1 and checks.is_series( arg) and ignore_sr_names: # ignore Series name continue if checks.is_default_index(index): # ignore simple ranges without name continue if new_index is None: new_index = index else: if index_from == 'strict': # If pandas objects have different index/columns, raise an exception if not pd.Index.equals(index, new_index): raise ValueError( f"Broadcasting {index_str} is not allowed when {index_str}_from=strict" ) # Broadcasting index must follow the rules of a regular broadcasting operation # https://docs.scipy.org/doc/numpy/user/basics.broadcasting.html#general-broadcasting-rules # 1. rule: if indexes are of the same length, they are simply stacked # 2. rule: if index has one element, it gets repeated and then stacked if pd.Index.equals(index, new_index): continue if len(index) != len(new_index): if len(index) > 1 and len(new_index) > 1: raise ValueError( "Indexes could not be broadcast together" ) if len(index) > len(new_index): new_index = index_fns.repeat_index( new_index, len(index)) elif len(index) < len(new_index): index = index_fns.repeat_index( index, len(new_index)) new_index = index_fns.stack_indexes( new_index, index, **kwargs) else: raise ValueError( f"Invalid value {index_from} for {'columns' if axis == 1 else 'index'}_from" ) else: new_index = index_from if new_index is not None: if maxlen > len(new_index): if index_from == 'strict': raise ValueError( f"Broadcasting {index_str} is not allowed when {index_str}_from=strict" ) # This happens only when some numpy object is longer than the new pandas index # In this case, new pandas index (one element) should be repeated to match this length. if maxlen > 1 and len(new_index) > 1: raise ValueError("Indexes could not be broadcast together") new_index = index_fns.repeat_index(new_index, maxlen) elif index_from is not None: # new_index=None can mean two things: 1) take original metadata or 2) reset index/columns # In case when index_from is not None, we choose 2) new_index = pd.RangeIndex(start=0, stop=maxlen, step=1) return new_index
def apply_func(obj_index: tp.Index) -> tp.Index: if on_top: return index_fns.stack_indexes([index, obj_index], **kwargs) return index_fns.stack_indexes([obj_index, index], **kwargs)
def broadcast_index(args, to_shape, index_from=None, axis=0, **kwargs): """Produce a broadcasted index/columns. Args: *args (array_like): Array-like objects. to_shape (tuple): Target shape. index_from (None, int, str or array_like): Broadcasting rule for this index/these columns. Accepts the following values: * `'default'` - take the value from `vectorbt.defaults.broadcasting` * `None` - use the original index/columns of the objects in `args` * `int` - use the index/columns of the i-nth object in `args` * `'strict'` - ensure that all pandas objects have the same index/columns * `'stack'` - stack different indexes/columns using `vectorbt.base.index_fns.stack_indexes` * everything else will be converted to `pd.Index` axis (int): Set to 0 for index and 1 for columns. **kwargs: Keyword arguments passed to `vectorbt.base.index_fns.stack_indexes`. For defaults, see `vectorbt.defaults.broadcasting`. """ index_str = 'columns' if axis == 1 else 'index' new_index = None if axis == 1 and len(to_shape) == 1: to_shape = (to_shape[0], 1) maxlen = to_shape[1] if axis == 1 else to_shape[0] if index_from is not None: if isinstance(index_from, int): # Take index/columns of the object indexed by index_from if not checks.is_pandas(args[index_from]): raise TypeError( f"Argument under index {index_from} must be a pandas object" ) new_index = index_fns.get_index(args[index_from], axis) elif isinstance(index_from, str): if index_from in ('stack', 'strict'): # If pandas objects have different index/columns, stack them together # maxlen stores the length of the longest index for arg in args: if checks.is_pandas(arg): index = index_fns.get_index(arg, axis) if checks.is_default_index(index): # ignore simple ranges without name continue if new_index is None: new_index = index else: if index_from == 'strict': # If pandas objects have different index/columns, raise an exception if not pd.Index.equals(index, new_index): raise ValueError( f"Broadcasting {index_str} is not allowed for {index_str}_from=strict" ) # Broadcasting index must follow the rules of a regular broadcasting operation # https://docs.scipy.org/doc/numpy/user/basics.broadcasting.html#general-broadcasting-rules # 1. rule: if indexes are of the same length, they are simply stacked # 2. rule: if index has one element, it gets repeated and then stacked if pd.Index.equals(index, new_index): continue if len(index) != len(new_index): if len(index) > 1 and len(new_index) > 1: raise ValueError( "Indexes could not be broadcast together" ) if len(index) > len(new_index): new_index = index_fns.repeat_index( new_index, len(index)) elif len(index) < len(new_index): index = index_fns.repeat_index( index, len(new_index)) new_index = index_fns.stack_indexes( new_index, index, **kwargs) else: raise ValueError( f"Invalid value {index_from} for {'columns' if axis == 1 else 'index'}_from" ) else: new_index = index_from if new_index is not None: if maxlen > len(new_index): if index_from == 'strict': raise ValueError( f"Broadcasting {index_str} is not allowed for {index_str}_from=strict" ) # This happens only when some numpy object is longer than the new pandas index # In this case, new pandas index (one element) should be repeated to match this length. if maxlen > 1 and len(new_index) > 1: raise ValueError("Indexes could not be broadcast together") new_index = index_fns.repeat_index(new_index, maxlen) return new_index
def split_into_ranges(self, n=None, range_len=None, start_idxs=None, end_idxs=None): """Either split into `n` ranges each `range_len` long, or split into ranges between `start_idxs` and `end_idxs`. At least one of `range_len`, `n`, or `start_idxs` and `end_idxs` must be set. If `range_len` is `None`, will split evenly into `n` ranges. If `n` is `None`, will return the maximum number of ranges of length `range_len`. If `start_idxs` and `end_idxs`, will split into ranges between both arrays. Both index arrays must be either NumPy arrays with positions (last exclusive) or pandas indexes with labels (last inclusive). Created levels `range_start` and `range_end` will contain labels (last inclusive). !!! note Ranges must have the same length. The datetime-like format of the index will be lost as result of this operation. Make sure to store the index metadata such as frequency information beforehand. Example: ```python-repl >>> print(df.vbt.split_into_ranges(n=2)) a b c range_start 2020-01-01 2020-01-04 2020-01-01 2020-01-04 2020-01-01 2020-01-04 range_end 2020-01-02 2020-01-05 2020-01-02 2020-01-05 2020-01-02 2020-01-05 0 1.0 4.0 5.0 2.0 1.0 2.0 1 2.0 5.0 4.0 1.0 2.0 1.0 >>> print(df.vbt.split_into_ranges(range_len=4)) a b c range_start 2020-01-01 2020-01-02 2020-01-01 2020-01-02 2020-01-01 2020-01-02 range_end 2020-01-04 2020-01-05 2020-01-04 2020-01-05 2020-01-04 2020-01-05 0 1.0 2.0 5.0 4.0 1.0 2.0 1 2.0 3.0 4.0 3.0 2.0 3.0 2 3.0 4.0 3.0 2.0 3.0 2.0 3 4.0 5.0 2.0 1.0 2.0 1.0 >>> print(df.vbt.split_into_ranges(start_idxs=[0, 1], end_idxs=[4, 5])) a b c range_start 2020-01-01 2020-01-02 2020-01-01 2020-01-02 2020-01-01 2020-01-02 range_end 2020-01-04 2020-01-05 2020-01-04 2020-01-05 2020-01-04 2020-01-05 0 1 2 5 4 1 2 1 2 3 4 3 2 3 2 3 4 3 2 3 2 3 4 5 2 1 2 1 >>> print(df.vbt.split_into_ranges( ... start_idxs=pd.Index(['2020-01-01', '2020-01-03']), ... end_idxs=pd.Index(['2020-01-02', '2020-01-04']) ... )) a b c range_start 2020-01-01 2020-01-03 2020-01-01 2020-01-03 2020-01-01 2020-01-03 range_end 2020-01-02 2020-01-04 2020-01-02 2020-01-04 2020-01-02 2020-01-04 0 1 3 5 3 1 3 1 2 4 4 2 2 2 ```""" if start_idxs is None and end_idxs is None: if range_len is None and n is None: raise ValueError( "At least range_len, n, or start_idxs and end_idxs must be set" ) if range_len is None: range_len = len(self.index) // n start_idxs = np.arange(len(self.index) - range_len + 1) end_idxs = np.arange(range_len, len(self.index) + 1) elif start_idxs is None or end_idxs is None: raise ValueError("Both start_idxs and end_idxs must be set") else: if isinstance(start_idxs, pd.Index): start_idxs = np.where(self.index.isin(start_idxs))[0] else: start_idxs = np.asarray(start_idxs) if isinstance(end_idxs, pd.Index): end_idxs = np.where(self.index.isin(end_idxs))[0] + 1 else: end_idxs = np.asarray(end_idxs) if np.any((end_idxs - start_idxs) != (end_idxs - start_idxs).item(0)): raise ValueError("Ranges must have the same length") if n is not None: if n > len(start_idxs): raise ValueError( f"n cannot be bigger than the maximum number of ranges {len(start_idxs)}" ) idxs = np.round(np.linspace(0, len(start_idxs) - 1, n)).astype(int) start_idxs = start_idxs[idxs] end_idxs = end_idxs[idxs] matrix = nb.concat_ranges_nb(self.to_2d_array(), start_idxs, end_idxs) range_starts = pd.Index(self.index[start_idxs], name='range_start') range_ends = pd.Index(self.index[end_idxs - 1], name='range_end') range_columns = index_fns.stack_indexes(range_starts, range_ends) new_columns = index_fns.combine_indexes(self.columns, range_columns) return pd.DataFrame(matrix, columns=new_columns)
def apply_func(obj_index): if on_top: return index_fns.stack_indexes(index, obj_index) return index_fns.stack_indexes(obj_index, index)