def key_to_ascending_key(key: GetItemKeyType, size: int) -> GetItemKeyType: ''' Normalize all types of keys into an ascending formation. Args: size: the length of the container on this axis ''' from static_frame.core.frame import Frame from static_frame.core.series import Series if isinstance(key, slice): return slice_to_ascending_slice(key, size=size) if isinstance(key, str) or not hasattr(key, '__len__'): return key if isinstance(key, np.ndarray): # array first as not truthy return np.sort(key, kind=DEFAULT_SORT_KIND) if not key: return key if isinstance(key, list): return sorted(key) if isinstance(key, Series): return key.sort_index() if isinstance(key, Frame): # for usage in assignment we need columns to be sorted return key.sort_columns() raise RuntimeError(f'unhandled key {key}')
def loc_to_iloc(self, key: GetItemKeyType, offset: tp.Optional[int] = None, key_transform: KeyTransformType = None) -> GetItemKeyType: ''' Note: Boolean Series are reindexed to this index, then passed on as all Boolean arrays. Args: offset: A default of None is critical to avoid large overhead in unnecessary application of offsets. key_transform: A function that transforms keys to specialized type; used by IndexDate indices. Returns: Return GetItemKey type that is based on integers, compatible with TypeBlocks ''' from static_frame.core.series import Series if self._recache: self._update_array_cache() if isinstance(key, ILoc): return key.key elif isinstance(key, Index): # if an Index, we simply use the values of the index key = key.values elif isinstance(key, Series): if key.dtype == bool: if not key.index.equals(self): key = key.reindex( self, fill_value=False, check_equals=False, ).values else: # the index is equal key = key.values else: key = key.values if self._map is None: # loc_is_iloc if isinstance(key, np.ndarray): if key.dtype == bool: return key if key.dtype != DTYPE_INT_DEFAULT: # if key is an np.array, it must be an int or bool type # could use tolist(), but we expect all keys to be integers return key.astype(DTYPE_INT_DEFAULT) elif isinstance(key, slice): key = slice_to_inclusive_slice(key) return key if key_transform: key = key_transform(key) return LocMap.loc_to_iloc( label_to_pos=self._map, labels=self._labels, positions=self._positions, # always an np.ndarray key=key, offset=offset)
def _extract_loc(self, key: GetItemKeyType) -> 'Bus': iloc_key = self._series._index.loc_to_iloc(key) #type: ignore # NOTE: if we update before slicing, we change the local and the object handed back self._update_series_cache_iloc(key=iloc_key) values = self._series.values[iloc_key] if not isinstance(values, np.ndarray): # if we have a single element if isinstance(key, HLoc) and key.has_key_multiple(): # must return a Series, even though we do not have an array values = np.array(values) values.flags.writeable = False else: return values #type: ignore series = Series(values, index=self._series._index.iloc[iloc_key], own_index=True, name=self._series._name) return self.__class__( series=series, store=self._store, config=self._config, )
def key_from_container_key( index: IndexBase, key: GetItemKeyType, expand_iloc: bool = False, ) -> GetItemKeyType: from static_frame.core.index import Index from static_frame.core.index import ILoc from static_frame.core.series import Series if isinstance(key, Index): # if an Index, we simply use the values of the index key = key.values elif isinstance(key, Series): if key.dtype == bool: # if a Boolean series, sort and reindex if not key.index.equals(index): key = key.reindex(index, fill_value=False, check_equals=False, ).values else: # the index is equal key = key.values else: # For all other Series types, we simply assume that the values are to be used as keys in the IH. This ignores the index, but it does not seem useful to require the Series, used like this, to have a matching index value, as the index and values would need to be identical to have the desired selection. key = key.values elif expand_iloc and isinstance(key, ILoc): # realize as Boolean array array = np.full(len(index), False) array[key.key] = True key = array # detect and fail on Frame? return key
def loc_to_iloc( self, key: GetItemKeyType, offset: tp.Optional[int] = None, key_transform: tp.Optional[tp.Callable[[GetItemKeyType], GetItemKeyType]] = None ) -> GetItemKeyType: ''' Note: Boolean Series are reindexed to this index, then passed on as all Boolean arrays. Args: offset: A default of None is critical to avoid large overhead in unnecessary application of offsets. key_transform: A function that transforms keys to specialized type; used by Data indices. Returns: Return GetItemKey type that is based on integers, compatible with TypeBlocks ''' from static_frame.core.series import Series if self._recache: self._update_array_cache() if isinstance(key, Index): # if an Index, we simply use the values of the index key = key.values if isinstance(key, Series): if key.dtype == bool: if _requires_reindex(key.index, self): key = key.reindex(self, fill_value=False).values else: # the index is equal key = key.values else: key = key.values if self._loc_is_iloc: return key if key_transform: key = key_transform(key) return LocMap.loc_to_iloc( self._map, self._positions, # always an np.ndarray key, offset)
def _extract_loc(self, key: GetItemKeyType) -> 'Series': ''' Compatibility: Pandas supports taking in iterables of keys, where some keys are not found in the index; a Series is returned as if a reindex operation was performed. This is undesirable. Better instead is to use reindex() ''' iloc_key = self._index.loc_to_iloc(key) values = self.values[iloc_key] if not isinstance(values, np.ndarray): # if we have a single element if isinstance(key, HLoc) and key.has_key_multiple(): # must return a Series, even though we do not have an array values = np.array(values) values.flags.writeable = False else: return values return self.__class__(values, index=self._index.iloc[iloc_key], own_index=True, name=self._name)
def loc_to_iloc( cls, *, label_to_pos: tp.Dict[tp.Hashable, int], labels: np.ndarray, positions: np.ndarray, key: GetItemKeyType, offset: tp.Optional[int] = None, partial_selection: bool = False, ) -> GetItemKeyType: ''' Note: all SF objects (Series, Index) need to be converted to basic types before being passed as `key` to this function. Args: offset: in the context of an IndexHierarchical, the iloc positions returned from this funcition need to be shifted. partial_selection: if True and key is an iterable of labels that includes labels not in the mapping, available matches will be returned rather than raising. Returns: An integer mapped slice, or GetItemKey type that is based on integers, compatible with TypeBlocks ''' # NOTE: ILoc is handled prior to this call, in the Index._loc_to_iloc method offset_apply = not offset is None if key.__class__ is slice: if key == NULL_SLICE: if offset_apply: # when offset is defined (even if it is zero), null slice is not sufficiently specific; need to convert to an explicit slice relative to the offset return slice(offset, len(positions) + offset) #type: ignore else: return NULL_SLICE try: return slice(*cls.map_slice_args( label_to_pos.get, #type: ignore key, labels, offset)) except LocEmpty: return EMPTY_SLICE labels_is_dt64 = labels.dtype.kind == DTYPE_DATETIME_KIND if key.__class__ is np.datetime64: # if we have a single dt64, convert this to the key's unit and do a Boolean selection if the key is a less-granular unit if (labels.dtype == DTYPE_OBJECT and np.datetime_data(key.dtype)[0] in DTYPE_OBJECTABLE_DT64_UNITS): #type: ignore key = key.astype(DTYPE_OBJECT) #type: ignore elif labels_is_dt64 and key.dtype < labels.dtype: #type: ignore key = labels.astype(key.dtype) == key #type: ignore # if not different type, keep it the same so as to do a direct, single element selection is_array = key.__class__ is np.ndarray is_list = isinstance(key, list) # can be an iterable of labels (keys) or an iterable of Booleans if is_array or is_list: if is_array and key.dtype.kind == DTYPE_DATETIME_KIND: #type: ignore if (labels.dtype == DTYPE_OBJECT and np.datetime_data(key.dtype)[0] in DTYPE_OBJECTABLE_DT64_UNITS): #type: ignore # if key is dt64 and labels are object, then for objectable units we can convert key to object to permit matching in the AutoMap # NOTE: tolist() is expected to be faster than astype object for smaller collections key = key.tolist() #type: ignore is_array = False is_list = True elif labels_is_dt64 and key.dtype < labels.dtype: #type: ignore # change the labels to the dt64 dtype, i.e., if the key is years, recast the labels as years, and do a Boolean selection of everything that matches each key labels_ref = labels.astype(key.dtype) # type: ignore # NOTE: this is only correct if both key and labels are dt64, and key is a less granular unit, as the order in the key and will not be used # let Boolean key advance to next branch key = reduce(OPERATORS['__or__'], (labels_ref == k for k in key)) # type: ignore if is_array and key.dtype == DTYPE_BOOL: #type: ignore if offset_apply: return positions[key] + offset return positions[key] # map labels to integer positions, return a list of integer positions # NOTE: we may miss the opportunity to identify contiguous keys and extract a slice # NOTE: we do more branching here to optimize performance if partial_selection: if offset_apply: return [ label_to_pos[k] + offset for k in key if k in label_to_pos ] #type: ignore return [label_to_pos[k] for k in key if k in label_to_pos] # type: ignore if offset_apply: return [label_to_pos[k] + offset for k in key] #type: ignore return [label_to_pos[k] for k in key] # type: ignore # if a single element (an integer, string, or date, we just get the integer out of the map if offset_apply: return label_to_pos[key] + offset #type: ignore return label_to_pos[key] #type: ignore
def loc_to_iloc(self, key: GetItemKeyType) -> GetItemKeyType: ''' This is the low-level loc_to_iloc, analagous to LocMap.loc_to_iloc as used by Index. As such, the key at this point should not be a Series or Index object. If key is an np.ndarray, a Boolean array will be passed through; otherwise, it will be treated as an iterable of values to be passed to leaf_loc_to_iloc. ''' if isinstance(key, slice): # given a top-level definition of a slice (and if that slice results in a single value), we can get a value range return slice(*LocMap.map_slice_args(self.leaf_loc_to_iloc, key)) # this should not match tuples that are leaf-locs if isinstance(key, KEY_ITERABLE_TYPES): if isinstance(key, np.ndarray) and key.dtype == bool: return key # keep as Boolean return [self.leaf_loc_to_iloc(x) for x in key] if not isinstance(key, HLoc): # assume it is a leaf loc tuple return self.leaf_loc_to_iloc(key) # everything after this is an HLoc # collect all ilocs for all leaf indices matching HLoc patterns ilocs = [] levels = deque(((self, 0, 0), )) # order matters while levels: level, depth, offset = levels.popleft() depth_key = key[depth] next_offset = offset + level.offset # print(level, depth, offset, depth_key, next_offset) # import ipdb; ipdb.set_trace() if level.targets is None: try: ilocs.append( level.index.loc_to_iloc(depth_key, offset=next_offset)) except KeyError: pass else: # target is iterable np.ndaarray try: iloc = level.index.loc_to_iloc(depth_key) # no offset except KeyError: pass else: level_targets = level.targets[ iloc] # get one or more IndexLevel objects next_depth = depth + 1 # if not an ndarray, iloc has extracted a single IndexLevel if isinstance(level_targets, IndexLevel): levels.append((level_targets, next_depth, next_offset)) else: levels.extend([(lvl, next_depth, next_offset) for lvl in level_targets]) iloc_count = len(ilocs) if iloc_count == 0: raise KeyError('no matching keys across all levels') if iloc_count == 1 and not key.has_key_multiple(): # drop to a single iloc selection return ilocs[0] # NOTE: might be able to combine contiguous ilocs into a single slice iloc = [] # combine into one flat iloc length = self.__len__() for part in ilocs: if isinstance(part, slice): iloc.extend(range(*part.indices(length))) # just look for ints elif isinstance(part, INT_TYPES): iloc.append(part) else: # assume it is an iterable iloc.extend(part) return iloc