def loc_searchsorted( self, values: tp.Any, *, side_left: bool = True, fill_value: tp.Any = np.nan, ) -> tp.Union[tp.Hashable, tp.Iterable[tp.Hashable]]: ''' {doc} Args: {values} {side_left} {fill_value} ''' sel = self.iloc_searchsorted(values, side_left=side_left) length = self.__len__() if sel.ndim == 0 and sel == length: # an element: return fill_value #type: ignore [no-any-return] mask = sel == length if not mask.any(): return self.values[sel] #type: ignore [no-any-return] post = np.empty(len(sel), dtype=resolve_dtype(self.dtype, dtype_from_element(fill_value))) sel[mask] = 0 # set out of range values to zero post[:] = self.values[sel] post[mask] = fill_value post.flags.writeable = False return post #type: ignore [no-any-return]
def pivot_index_map( *, index_src: IndexBase, depth_level: DepthLevelSpecifier, dtypes_src: tp.Optional[tp.Sequence[np.dtype]], ) -> PivotIndexMap: ''' Args: dtypes_src: must be of length equal to axis ''' # We are always moving levels from one axis to another; after application, the expanded axis will always be hierarchical, while the contracted axis may or may not be. From the contract axis, we need to divide the depths into two categories: targets (the depths to be moved and added to expand axis) and groups (unique combinations that remain on the contract axis after removing targets). # Unique target labels are added to labels on the expand axis; unique group labels become the new contract axis. target_select = np.full(index_src.depth, False) target_select[depth_level] = True group_select = ~target_select group_arrays = [] target_arrays = [] for i, v in enumerate(target_select): if v: target_arrays.append(index_src.values_at_depth(i)) else: group_arrays.append(index_src.values_at_depth(i)) group_depth = len(group_arrays) target_depth = len(target_arrays) group_to_dtype: tp.Dict[tp.Optional[tp.Hashable], np.dtype] = {} targets_unique: tp.Iterable[tp.Hashable] if group_depth == 0: # targets must be a tuple group_to_target_map = { None: {v: idx for idx, v in enumerate(zip(*target_arrays))} } targets_unique = [k for k in group_to_target_map[None]] if dtypes_src is not None: group_to_dtype[None] = resolve_dtype_iter(dtypes_src) else: group_to_target_map = defaultdict(dict) targets_unique = dict() # Store targets in order observed for axis_idx, (group, target, dtype) in enumerate( zip( zip(*group_arrays), # get tuples of len 1 to depth zip(*target_arrays), (dtypes_src if dtypes_src is not None else repeat(None)), )): if group_depth == 1: group = group[0] # targets are transfered labels; groups are the new columns group_to_target_map[group][target] = axis_idx targets_unique[target] = None #type: ignore if dtypes_src is not None: if group in group_to_dtype: group_to_dtype[group] = resolve_dtype( group_to_dtype[group], dtype) else: group_to_dtype[group] = dtype return PivotIndexMap( #pylint: disable=E1120 targets_unique=targets_unique, target_depth=target_depth, target_select=target_select, group_to_target_map=group_to_target_map, #type: ignore group_depth=group_depth, group_select=group_select, group_to_dtype=group_to_dtype)
def pivot_records_items_to_blocks( *, blocks: TypeBlocks, group_fields_iloc: tp.Iterable[tp.Hashable], group_depth: int, data_fields_iloc: tp.Iterable[tp.Hashable], func_single: tp.Optional[UFunc], func_map: tp.Sequence[tp.Tuple[tp.Hashable, UFunc]], func_no: bool, fill_value: tp.Any, fill_value_dtype: np.dtype, index_outer: 'IndexBase', dtypes: tp.Tuple[tp.Optional[np.dtype]], kind: str, ) -> tp.List[np.ndarray]: ''' Given a Frame and pivot parameters, perform the group by ont he group_fields and within each group, ''' # NOTE: this delivers results by label, row for use in a Frame.from_records_items constructor group_key = group_fields_iloc if group_depth > 1 else group_fields_iloc[ 0] #type: ignore arrays: tp.List[tp.Union[tp.List[tp.Any], np.ndarray]] = [] for dtype in dtypes: if dtype is None: # we can use fill_value here, as either it will be completely replaced (and not effect dtype evaluation) or be needed (and already there) arrays.append([fill_value] * len(index_outer)) else: arrays.append(np.empty(len(index_outer), dtype=dtype)) # try to use the dtype specified; fill values at end if necessary # collect all possible ilocs, and remove as observerd; if any remain, we have fill targets iloc_not_found: tp.Set[int] = set(range(len(index_outer))) # each group forms a row, each label a value in the index for label, _, part in blocks.group(axis=0, key=group_key, kind=kind): iloc: int = index_outer._loc_to_iloc(label) #type: ignore iloc_not_found.remove(iloc) if func_no: if len(part) != 1: raise RuntimeError( 'pivot requires aggregation of values; provide a `func` argument.' ) for arrays_key, column_key in enumerate(data_fields_iloc): # this is equivalent to extracting a row, but doing so would force a type consolidation arrays[arrays_key][iloc] = part._extract(0, column_key) elif func_single: for arrays_key, column_key in enumerate(data_fields_iloc): arrays[arrays_key][iloc] = func_single( part._extract_array_column(column_key)) else: arrays_key = 0 for column_key in data_fields_iloc: values = part._extract_array_column(column_key) for _, func in func_map: arrays[arrays_key][iloc] = func(values) arrays_key += 1 if iloc_not_found: # we did not fill all arrrays and have values that need to be filled # order does not matter fill_targets = list(iloc_not_found) # mutate in place then make immutable for arrays_key in range(len(arrays)): #pylint: disable=C0200 array = arrays[arrays_key] if not array.__class__ is np.ndarray: # a list array, _ = iterable_to_array_1d(array, count=len(index_outer)) arrays[arrays_key] = array # restore new array else: dtype_resolved = resolve_dtype( array.dtype, fill_value_dtype) # type: ignore if array.dtype != dtype_resolved: # type: ignore array = array.astype(dtype_resolved) #type: ignore array[fill_targets] = fill_value arrays[arrays_key] = array # re-assign new array array.flags.writeable = False # type: ignore else: for arrays_key in range(len(arrays)): #pylint: disable=C0200 array = arrays[arrays_key] if not array.__class__ is np.ndarray: # a list array, _ = iterable_to_array_1d(array, count=len(index_outer)) arrays[arrays_key] = array # re-assign new array array.flags.writeable = False return arrays
def pivot_items_to_block( *, blocks: TypeBlocks, group_fields_iloc: tp.Iterable[tp.Hashable], group_depth: int, data_field_iloc: tp.Hashable, func_single: tp.Optional[UFunc], dtype: tp.Optional[np.dtype], fill_value: tp.Any, fill_value_dtype: np.dtype, index_outer: 'IndexBase', kind: str, ) -> np.ndarray: ''' Specialized generator of pairs for when we have only one data_field and one function. ''' from static_frame.core.series import Series group_key = group_fields_iloc if group_depth > 1 else group_fields_iloc[ 0] #type: ignore if func_single and dtype is not None: array = np.full( len(index_outer), fill_value, dtype=resolve_dtype(dtype, fill_value_dtype), ) for label, _, values in blocks.group_extract( axis=0, key=group_key, extract=data_field_iloc, kind=kind, ): array[index_outer._loc_to_iloc(label)] = func_single(values) array.flags.writeable = False return array if func_single and dtype is None: def gen() -> tp.Iterator[tp.Tuple[int, tp.Any]]: for label, _, values in blocks.group_extract( axis=0, key=group_key, extract=data_field_iloc, kind=kind, ): yield index_outer._loc_to_iloc(label), func_single(values) post = Series.from_items(gen()) if len(post) == len(index_outer): array = np.empty(len(index_outer), dtype=post.dtype) else: array = np.full( len(index_outer), fill_value, dtype=resolve_dtype(post.dtype, fill_value_dtype), ) array[post.index.values] = post.values array.flags.writeable = False return array # func_no scenario as no mapping here if group_depth == 1: labels = [ index_outer._loc_to_iloc(label) for label in blocks._extract_array_column(group_key) ] else: # NOTE: might replace _extract_array_column with an iterator of tuples labels = [ index_outer._loc_to_iloc(tuple(label)) for label in blocks._extract_array(column_key=group_key) ] values = blocks._extract_array_column(data_field_iloc) if len(values) == len(index_outer): array = np.empty(len(index_outer), dtype=dtype) else: array = np.full( len(index_outer), fill_value, dtype=resolve_dtype(values.dtype, fill_value_dtype), ) array[labels] = values array.flags.writeable = False return array
def test_resolve_dtype(self, dtype_pair: tp.Tuple[np.dtype, np.dtype]) -> None: x = resolve_dtype(*dtype_pair) self.assertTrue(isinstance(x, np.dtype))