def _col_idxs_meta(self, col_idxs: tp.Array1d) -> tp.Tuple[tp.Array1d, tp.Array1d]: """Get metadata of column indices. Returns element indices and new column array. Automatically decides whether to use column range or column map.""" if self.is_sorted(): new_indices, new_col_arr = nb.col_range_select_nb(self.col_range, to_1d_array(col_idxs)) # faster else: new_indices, new_col_arr = nb.col_map_select_nb(self.col_map, to_1d_array(col_idxs)) return new_indices, new_col_arr
def get_by_col_idxs(self, col_idxs: tp.Array1d) -> tp.RecordArray: """Get records corresponding to column indices. Returns new records array.""" if self.col_mapper.is_sorted(): new_records_arr = nb.record_col_range_select_nb( self.values, self.col_mapper.col_range, to_1d_array(col_idxs)) # faster else: new_records_arr = nb.record_col_map_select_nb( self.values, self.col_mapper.col_map, to_1d_array(col_idxs)) return new_records_arr
def describe(self, percentiles: tp.Optional[tp.ArrayLike] = None, ddof: int = 1, group_by: tp.GroupByLike = None, wrap_kwargs: tp.KwargsLike = None, **kwargs) -> tp.SeriesFrame: """Return statistics by column/group.""" if percentiles is not None: percentiles = to_1d_array(percentiles) else: percentiles = np.array([0.25, 0.5, 0.75]) percentiles = percentiles.tolist() if 0.5 not in percentiles: percentiles.append(0.5) percentiles = np.unique(percentiles) perc_formatted = pd.io.formats.format.format_percentiles(percentiles) index = pd.Index( ['count', 'mean', 'std', 'min', *perc_formatted, 'max']) wrap_kwargs = merge_dicts(dict(name_or_index=index), wrap_kwargs) out = self.reduce(generic_nb.describe_reduce_nb, percentiles, ddof, returns_array=True, returns_idx=False, group_by=group_by, wrap_kwargs=wrap_kwargs, **kwargs) if isinstance(out, pd.DataFrame): out.loc['count'].fillna(0., inplace=True) else: if np.isnan(out.loc['count']): out.loc['count'] = 0. return out
def to_1d_array(self) -> tp.Array1d: """Convert to 1-dim NumPy array See `vectorbt.base.reshape_fns.to_1d`.""" return reshape_fns.to_1d_array(self.obj)
def indexing_func_meta(self: ArrayWrapperT, pd_indexing_func: tp.PandasIndexingFunc, index: tp.Optional[tp.IndexLike] = None, columns: tp.Optional[tp.IndexLike] = None, column_only_select: tp.Optional[bool] = None, group_select: tp.Optional[bool] = None, group_by: tp.GroupByLike = None) -> IndexingMetaT: """Perform indexing on `ArrayWrapper` and also return indexing metadata. Takes into account column grouping. Set `column_only_select` to True to index the array wrapper as a Series of columns. This way, selection of index (axis 0) can be avoided. Set `group_select` to True to select groups rather than columns. Takes effect only if grouping is enabled. !!! note If `column_only_select` is True, make sure to index the array wrapper as a Series of columns rather than a DataFrame. For example, the operation `.iloc[:, :2]` should become `.iloc[:2]`. Operations are not allowed if the object is already a Series and thus has only one column/group.""" from vectorbt._settings import settings array_wrapper_cfg = settings['array_wrapper'] if column_only_select is None: column_only_select = self.column_only_select if column_only_select is None: column_only_select = array_wrapper_cfg['column_only_select'] if group_select is None: group_select = self.group_select if group_select is None: group_select = array_wrapper_cfg['group_select'] _self = self.regroup(group_by) group_select = group_select and _self.grouper.is_grouped() if index is None: index = _self.index if not isinstance(index, pd.Index): index = pd.Index(index) if columns is None: if group_select: columns = _self.grouper.get_columns() else: columns = _self.columns if not isinstance(columns, pd.Index): columns = pd.Index(columns) if group_select: # Groups as columns i_wrapper = ArrayWrapper(index, columns, _self.get_ndim()) else: # Columns as columns i_wrapper = ArrayWrapper(index, columns, _self.ndim) n_rows = len(index) n_cols = len(columns) if column_only_select: if i_wrapper.ndim == 1: raise IndexingError( "Columns only: This object already contains one column of data" ) try: col_mapper = pd_indexing_func( i_wrapper.wrap_reduced(np.arange(n_cols), columns=columns)) except pd.core.indexing.IndexingError as e: warnings.warn( "Columns only: Make sure to treat this object " "as a Series of columns rather than a DataFrame", stacklevel=2) raise e if checks.is_series(col_mapper): new_columns = col_mapper.index col_idxs = col_mapper.values new_ndim = 2 else: new_columns = columns[[col_mapper]] col_idxs = col_mapper new_ndim = 1 new_index = index idx_idxs = np.arange(len(index)) else: idx_mapper = pd_indexing_func( i_wrapper.wrap(np.broadcast_to( np.arange(n_rows)[:, None], (n_rows, n_cols)), index=index, columns=columns)) if i_wrapper.ndim == 1: if not checks.is_series(idx_mapper): raise IndexingError("Selection of a scalar is not allowed") idx_idxs = idx_mapper.values col_idxs = 0 else: col_mapper = pd_indexing_func( i_wrapper.wrap(np.broadcast_to(np.arange(n_cols), (n_rows, n_cols)), index=index, columns=columns)) if checks.is_frame(idx_mapper): idx_idxs = idx_mapper.values[:, 0] col_idxs = col_mapper.values[0] elif checks.is_series(idx_mapper): one_col = np.all( col_mapper.values == col_mapper.values.item(0)) one_idx = np.all( idx_mapper.values == idx_mapper.values.item(0)) if one_col and one_idx: # One index and one column selected, multiple times raise IndexingError( "Must select at least two unique indices in one of both axes" ) elif one_col: # One column selected idx_idxs = idx_mapper.values col_idxs = col_mapper.values[0] elif one_idx: # One index selected idx_idxs = idx_mapper.values[0] col_idxs = col_mapper.values else: raise IndexingError else: raise IndexingError("Selection of a scalar is not allowed") new_index = index_fns.get_index(idx_mapper, 0) if not isinstance(idx_idxs, np.ndarray): # One index selected new_columns = index[[idx_idxs]] elif not isinstance(col_idxs, np.ndarray): # One column selected new_columns = columns[[col_idxs]] else: new_columns = index_fns.get_index(idx_mapper, 1) new_ndim = idx_mapper.ndim if _self.grouper.is_grouped(): # Grouping enabled if np.asarray(idx_idxs).ndim == 0: raise IndexingError( "Flipping index and columns is not allowed") if group_select: # Selection based on groups # Get indices of columns corresponding to selected groups group_idxs = col_idxs group_idxs_arr = reshape_fns.to_1d_array(group_idxs) group_start_idxs = _self.grouper.get_group_start_idxs( )[group_idxs_arr] group_end_idxs = _self.grouper.get_group_end_idxs( )[group_idxs_arr] ungrouped_col_idxs = get_ranges_arr(group_start_idxs, group_end_idxs) ungrouped_columns = _self.columns[ungrouped_col_idxs] if new_ndim == 1 and len(ungrouped_columns) == 1: ungrouped_ndim = 1 ungrouped_col_idxs = ungrouped_col_idxs[0] else: ungrouped_ndim = 2 # Get indices of selected groups corresponding to the new columns # We could do _self.group_by[ungrouped_col_idxs] but indexing operation may have changed the labels group_lens = _self.grouper.get_group_lens()[group_idxs_arr] ungrouped_group_idxs = np.full(len(ungrouped_columns), 0) ungrouped_group_idxs[group_lens[:-1]] = 1 ungrouped_group_idxs = np.cumsum(ungrouped_group_idxs) return _self.replace( index=new_index, columns=ungrouped_columns, ndim=ungrouped_ndim, grouped_ndim=new_ndim, group_by=new_columns[ungrouped_group_idxs] ), idx_idxs, group_idxs, ungrouped_col_idxs # Selection based on columns col_idxs_arr = reshape_fns.to_1d_array(col_idxs) return _self.replace(index=new_index, columns=new_columns, ndim=new_ndim, grouped_ndim=None, group_by=_self.grouper.group_by[col_idxs_arr] ), idx_idxs, col_idxs, col_idxs # Grouping disabled return _self.replace(index=new_index, columns=new_columns, ndim=new_ndim, grouped_ndim=None, group_by=None), idx_idxs, col_idxs, col_idxs