Exemplo n.º 1
0
def key_to_ascending_key(key: GetItemKeyType, size: int) -> GetItemKeyType:
    '''
    Normalize all types of keys into an ascending formation.

    Args:
        size: the length of the container on this axis
    '''
    from static_frame.core.frame import Frame
    from static_frame.core.series import Series

    if isinstance(key, slice):
        return slice_to_ascending_slice(key, size=size)

    if isinstance(key, str) or not hasattr(key, '__len__'):
        return key

    if isinstance(key, np.ndarray):
        # array first as not truthy
        return np.sort(key, kind=DEFAULT_SORT_KIND)

    if not key:
        return key

    if isinstance(key, list):
        return sorted(key)

    if isinstance(key, Series):
        return key.sort_index()

    if isinstance(key, Frame):
        # for usage in assignment we need columns to be sorted
        return key.sort_columns()

    raise RuntimeError(f'unhandled key {key}')
Exemplo n.º 2
0
    def loc_to_iloc(self,
                    key: GetItemKeyType,
                    offset: tp.Optional[int] = None,
                    key_transform: KeyTransformType = None) -> GetItemKeyType:
        '''
        Note: Boolean Series are reindexed to this index, then passed on as all Boolean arrays.

        Args:
            offset: A default of None is critical to avoid large overhead in unnecessary application of offsets.
            key_transform: A function that transforms keys to specialized type; used by IndexDate indices.
        Returns:
            Return GetItemKey type that is based on integers, compatible with TypeBlocks
        '''
        from static_frame.core.series import Series

        if self._recache:
            self._update_array_cache()

        if isinstance(key, ILoc):
            return key.key
        elif isinstance(key, Index):
            # if an Index, we simply use the values of the index
            key = key.values
        elif isinstance(key, Series):
            if key.dtype == bool:
                if not key.index.equals(self):
                    key = key.reindex(
                        self,
                        fill_value=False,
                        check_equals=False,
                    ).values
                else:  # the index is equal
                    key = key.values
            else:
                key = key.values

        if self._map is None:  # loc_is_iloc
            if isinstance(key, np.ndarray):
                if key.dtype == bool:
                    return key
                if key.dtype != DTYPE_INT_DEFAULT:
                    # if key is an np.array, it must be an int or bool type
                    # could use tolist(), but we expect all keys to be integers
                    return key.astype(DTYPE_INT_DEFAULT)
            elif isinstance(key, slice):
                key = slice_to_inclusive_slice(key)
            return key

        if key_transform:
            key = key_transform(key)

        return LocMap.loc_to_iloc(
            label_to_pos=self._map,
            labels=self._labels,
            positions=self._positions,  # always an np.ndarray
            key=key,
            offset=offset)
Exemplo n.º 3
0
    def _extract_loc(self, key: GetItemKeyType) -> 'Bus':

        iloc_key = self._series._index.loc_to_iloc(key)  #type: ignore

        # NOTE: if we update before slicing, we change the local and the object handed back
        self._update_series_cache_iloc(key=iloc_key)

        values = self._series.values[iloc_key]

        if not isinstance(values, np.ndarray):  # if we have a single element
            if isinstance(key, HLoc) and key.has_key_multiple():
                # must return a Series, even though we do not have an array
                values = np.array(values)
                values.flags.writeable = False
            else:
                return values  #type: ignore

        series = Series(values,
                        index=self._series._index.iloc[iloc_key],
                        own_index=True,
                        name=self._series._name)
        return self.__class__(
            series=series,
            store=self._store,
            config=self._config,
        )
Exemplo n.º 4
0
def key_from_container_key(
        index: IndexBase,
        key: GetItemKeyType,
        expand_iloc: bool = False,
        ) -> GetItemKeyType:

    from static_frame.core.index import Index
    from static_frame.core.index import ILoc
    from static_frame.core.series import Series

    if isinstance(key, Index):
        # if an Index, we simply use the values of the index
        key = key.values
    elif isinstance(key, Series):
        if key.dtype == bool:
            # if a Boolean series, sort and reindex
            if not key.index.equals(index):
                key = key.reindex(index,
                        fill_value=False,
                        check_equals=False,
                        ).values
            else: # the index is equal
                key = key.values
        else:
            # For all other Series types, we simply assume that the values are to be used as keys in the IH. This ignores the index, but it does not seem useful to require the Series, used like this, to have a matching index value, as the index and values would need to be identical to have the desired selection.
            key = key.values
    elif expand_iloc and isinstance(key, ILoc):
        # realize as Boolean array
        array = np.full(len(index), False)
        array[key.key] = True
        key = array

    # detect and fail on Frame?

    return key
Exemplo n.º 5
0
    def loc_to_iloc(
        self,
        key: GetItemKeyType,
        offset: tp.Optional[int] = None,
        key_transform: tp.Optional[tp.Callable[[GetItemKeyType],
                                               GetItemKeyType]] = None
    ) -> GetItemKeyType:
        '''
        Note: Boolean Series are reindexed to this index, then passed on as all Boolean arrays.

        Args:
            offset: A default of None is critical to avoid large overhead in unnecessary application of offsets.
            key_transform: A function that transforms keys to specialized type; used by Data indices.
        Returns:
            Return GetItemKey type that is based on integers, compatible with TypeBlocks
        '''
        from static_frame.core.series import Series

        if self._recache:
            self._update_array_cache()

        if isinstance(key, Index):
            # if an Index, we simply use the values of the index
            key = key.values

        if isinstance(key, Series):
            if key.dtype == bool:
                if _requires_reindex(key.index, self):
                    key = key.reindex(self, fill_value=False).values
                else:  # the index is equal
                    key = key.values
            else:
                key = key.values

        if self._loc_is_iloc:
            return key

        if key_transform:
            key = key_transform(key)

        return LocMap.loc_to_iloc(
            self._map,
            self._positions,  # always an np.ndarray
            key,
            offset)
Exemplo n.º 6
0
    def _extract_loc(self, key: GetItemKeyType) -> 'Series':
        '''
        Compatibility:
            Pandas supports taking in iterables of keys, where some keys are not found in the index; a Series is returned as if a reindex operation was performed. This is undesirable. Better instead is to use reindex()
        '''
        iloc_key = self._index.loc_to_iloc(key)
        values = self.values[iloc_key]

        if not isinstance(values, np.ndarray): # if we have a single element
            if isinstance(key, HLoc) and key.has_key_multiple():
                # must return a Series, even though we do not have an array
                values = np.array(values)
                values.flags.writeable = False
            else:
                return values

        return self.__class__(values,
                index=self._index.iloc[iloc_key],
                own_index=True,
                name=self._name)
Exemplo n.º 7
0
    def loc_to_iloc(
        cls,
        *,
        label_to_pos: tp.Dict[tp.Hashable, int],
        labels: np.ndarray,
        positions: np.ndarray,
        key: GetItemKeyType,
        offset: tp.Optional[int] = None,
        partial_selection: bool = False,
    ) -> GetItemKeyType:
        '''
        Note: all SF objects (Series, Index) need to be converted to basic types before being passed as `key` to this function.

        Args:
            offset: in the context of an IndexHierarchical, the iloc positions returned from this funcition need to be shifted.
            partial_selection: if True and key is an iterable of labels that includes labels not in the mapping, available matches will be returned rather than raising.
        Returns:
            An integer mapped slice, or GetItemKey type that is based on integers, compatible with TypeBlocks
        '''
        # NOTE: ILoc is handled prior to this call, in the Index._loc_to_iloc method
        offset_apply = not offset is None

        if key.__class__ is slice:
            if key == NULL_SLICE:
                if offset_apply:
                    # when offset is defined (even if it is zero), null slice is not sufficiently specific; need to convert to an explicit slice relative to the offset
                    return slice(offset,
                                 len(positions) + offset)  #type: ignore
                else:
                    return NULL_SLICE
            try:
                return slice(*cls.map_slice_args(
                    label_to_pos.get,  #type: ignore
                    key,
                    labels,
                    offset))
            except LocEmpty:
                return EMPTY_SLICE

        labels_is_dt64 = labels.dtype.kind == DTYPE_DATETIME_KIND

        if key.__class__ is np.datetime64:
            # if we have a single dt64, convert this to the key's unit and do a Boolean selection if the key is a less-granular unit
            if (labels.dtype == DTYPE_OBJECT and np.datetime_data(key.dtype)[0]
                    in DTYPE_OBJECTABLE_DT64_UNITS):  #type: ignore
                key = key.astype(DTYPE_OBJECT)  #type: ignore
            elif labels_is_dt64 and key.dtype < labels.dtype:  #type: ignore
                key = labels.astype(key.dtype) == key  #type: ignore
            # if not different type, keep it the same so as to do a direct, single element selection

        is_array = key.__class__ is np.ndarray
        is_list = isinstance(key, list)

        # can be an iterable of labels (keys) or an iterable of Booleans
        if is_array or is_list:
            if is_array and key.dtype.kind == DTYPE_DATETIME_KIND:  #type: ignore
                if (labels.dtype == DTYPE_OBJECT
                        and np.datetime_data(key.dtype)[0]
                        in DTYPE_OBJECTABLE_DT64_UNITS):  #type: ignore
                    # if key is dt64 and labels are object, then for objectable units we can convert key to object to permit matching in the AutoMap
                    # NOTE: tolist() is expected to be faster than astype object for smaller collections
                    key = key.tolist()  #type: ignore
                    is_array = False
                    is_list = True
                elif labels_is_dt64 and key.dtype < labels.dtype:  #type: ignore
                    # change the labels to the dt64 dtype, i.e., if the key is years, recast the labels as years, and do a Boolean selection of everything that matches each key
                    labels_ref = labels.astype(key.dtype)  # type: ignore
                    # NOTE: this is only correct if both key and labels are dt64, and key is a less granular unit, as the order in the key and will not be used
                    # let Boolean key advance to next branch
                    key = reduce(OPERATORS['__or__'],
                                 (labels_ref == k
                                  for k in key))  # type: ignore

            if is_array and key.dtype == DTYPE_BOOL:  #type: ignore
                if offset_apply:
                    return positions[key] + offset
                return positions[key]

            # map labels to integer positions, return a list of integer positions
            # NOTE: we may miss the opportunity to identify contiguous keys and extract a slice
            # NOTE: we do more branching here to optimize performance
            if partial_selection:
                if offset_apply:
                    return [
                        label_to_pos[k] + offset for k in key
                        if k in label_to_pos
                    ]  #type: ignore
                return [label_to_pos[k] for k in key
                        if k in label_to_pos]  # type: ignore
            if offset_apply:
                return [label_to_pos[k] + offset for k in key]  #type: ignore
            return [label_to_pos[k] for k in key]  # type: ignore

        # if a single element (an integer, string, or date, we just get the integer out of the map
        if offset_apply:
            return label_to_pos[key] + offset  #type: ignore
        return label_to_pos[key]  #type: ignore
Exemplo n.º 8
0
    def loc_to_iloc(self, key: GetItemKeyType) -> GetItemKeyType:
        '''
        This is the low-level loc_to_iloc, analagous to LocMap.loc_to_iloc as used by Index. As such, the key at this point should not be a Series or Index object.

        If key is an np.ndarray, a Boolean array will be passed through; otherwise, it will be treated as an iterable of values to be passed to leaf_loc_to_iloc.
        '''
        if isinstance(key, slice):
            # given a top-level definition of a slice (and if that slice results in a single value), we can get a value range
            return slice(*LocMap.map_slice_args(self.leaf_loc_to_iloc, key))

        # this should not match tuples that are leaf-locs
        if isinstance(key, KEY_ITERABLE_TYPES):
            if isinstance(key, np.ndarray) and key.dtype == bool:
                return key  # keep as Boolean
            return [self.leaf_loc_to_iloc(x) for x in key]

        if not isinstance(key, HLoc):
            # assume it is a leaf loc tuple
            return self.leaf_loc_to_iloc(key)

        # everything after this is an HLoc

        # collect all ilocs for all leaf indices matching HLoc patterns
        ilocs = []
        levels = deque(((self, 0, 0), ))  # order matters

        while levels:
            level, depth, offset = levels.popleft()
            depth_key = key[depth]
            next_offset = offset + level.offset

            # print(level, depth, offset, depth_key, next_offset)
            # import ipdb; ipdb.set_trace()

            if level.targets is None:
                try:
                    ilocs.append(
                        level.index.loc_to_iloc(depth_key, offset=next_offset))
                except KeyError:
                    pass
            else:  # target is iterable np.ndaarray
                try:
                    iloc = level.index.loc_to_iloc(depth_key)  # no offset
                except KeyError:
                    pass
                else:
                    level_targets = level.targets[
                        iloc]  # get one or more IndexLevel objects
                    next_depth = depth + 1
                    # if not an ndarray, iloc has extracted a single IndexLevel
                    if isinstance(level_targets, IndexLevel):
                        levels.append((level_targets, next_depth, next_offset))
                    else:
                        levels.extend([(lvl, next_depth, next_offset)
                                       for lvl in level_targets])

        iloc_count = len(ilocs)
        if iloc_count == 0:
            raise KeyError('no matching keys across all levels')

        if iloc_count == 1 and not key.has_key_multiple():
            # drop to a single iloc selection
            return ilocs[0]

        # NOTE: might be able to combine contiguous ilocs into a single slice
        iloc = []  # combine into one flat iloc
        length = self.__len__()
        for part in ilocs:
            if isinstance(part, slice):
                iloc.extend(range(*part.indices(length)))
            # just look for ints
            elif isinstance(part, INT_TYPES):
                iloc.append(part)
            else:  # assume it is an iterable
                iloc.extend(part)
        return iloc