Exemplo n.º 1
0
    def loc_to_iloc(self,
                    key: GetItemKeyType,
                    offset: tp.Optional[int] = None,
                    key_transform: KeyTransformType = None) -> GetItemKeyType:
        '''
        Note: Boolean Series are reindexed to this index, then passed on as all Boolean arrays.

        Args:
            offset: A default of None is critical to avoid large overhead in unnecessary application of offsets.
            key_transform: A function that transforms keys to specialized type; used by IndexDate indices.
        Returns:
            Return GetItemKey type that is based on integers, compatible with TypeBlocks
        '''
        from static_frame.core.series import Series

        if self._recache:
            self._update_array_cache()

        if isinstance(key, ILoc):
            return key.key
        elif isinstance(key, Index):
            # if an Index, we simply use the values of the index
            key = key.values
        elif isinstance(key, Series):
            if key.dtype == bool:
                if not key.index.equals(self):
                    key = key.reindex(
                        self,
                        fill_value=False,
                        check_equals=False,
                    ).values
                else:  # the index is equal
                    key = key.values
            else:
                key = key.values

        if self._map is None:  # loc_is_iloc
            if isinstance(key, np.ndarray):
                if key.dtype == bool:
                    return key
                if key.dtype != DTYPE_INT_DEFAULT:
                    # if key is an np.array, it must be an int or bool type
                    # could use tolist(), but we expect all keys to be integers
                    return key.astype(DTYPE_INT_DEFAULT)
            elif isinstance(key, slice):
                key = slice_to_inclusive_slice(key)
            return key

        if key_transform:
            key = key_transform(key)

        return LocMap.loc_to_iloc(
            label_to_pos=self._map,
            labels=self._labels,
            positions=self._positions,  # always an np.ndarray
            key=key,
            offset=offset)
Exemplo n.º 2
0
    def loc_to_iloc(
        cls,
        *,
        label_to_pos: tp.Dict[tp.Hashable, int],
        labels: np.ndarray,
        positions: np.ndarray,
        key: GetItemKeyType,
        offset: tp.Optional[int] = None,
        partial_selection: bool = False,
    ) -> GetItemKeyType:
        '''
        Note: all SF objects (Series, Index) need to be converted to basic types before being passed as `key` to this function.

        Args:
            offset: in the context of an IndexHierarchical, the iloc positions returned from this funcition need to be shifted.
            partial_selection: if True and key is an iterable of labels that includes labels not in the mapping, available matches will be returned rather than raising.
        Returns:
            An integer mapped slice, or GetItemKey type that is based on integers, compatible with TypeBlocks
        '''
        # NOTE: ILoc is handled prior to this call, in the Index._loc_to_iloc method
        offset_apply = not offset is None

        if key.__class__ is slice:
            if key == NULL_SLICE:
                if offset_apply:
                    # when offset is defined (even if it is zero), null slice is not sufficiently specific; need to convert to an explicit slice relative to the offset
                    return slice(offset,
                                 len(positions) + offset)  #type: ignore
                else:
                    return NULL_SLICE
            try:
                return slice(*cls.map_slice_args(
                    label_to_pos.get,  #type: ignore
                    key,
                    labels,
                    offset))
            except LocEmpty:
                return EMPTY_SLICE

        labels_is_dt64 = labels.dtype.kind == DTYPE_DATETIME_KIND

        if key.__class__ is np.datetime64:
            # if we have a single dt64, convert this to the key's unit and do a Boolean selection if the key is a less-granular unit
            if (labels.dtype == DTYPE_OBJECT and np.datetime_data(key.dtype)[0]
                    in DTYPE_OBJECTABLE_DT64_UNITS):  #type: ignore
                key = key.astype(DTYPE_OBJECT)  #type: ignore
            elif labels_is_dt64 and key.dtype < labels.dtype:  #type: ignore
                key = labels.astype(key.dtype) == key  #type: ignore
            # if not different type, keep it the same so as to do a direct, single element selection

        is_array = key.__class__ is np.ndarray
        is_list = isinstance(key, list)

        # can be an iterable of labels (keys) or an iterable of Booleans
        if is_array or is_list:
            if is_array and key.dtype.kind == DTYPE_DATETIME_KIND:  #type: ignore
                if (labels.dtype == DTYPE_OBJECT
                        and np.datetime_data(key.dtype)[0]
                        in DTYPE_OBJECTABLE_DT64_UNITS):  #type: ignore
                    # if key is dt64 and labels are object, then for objectable units we can convert key to object to permit matching in the AutoMap
                    # NOTE: tolist() is expected to be faster than astype object for smaller collections
                    key = key.tolist()  #type: ignore
                    is_array = False
                    is_list = True
                elif labels_is_dt64 and key.dtype < labels.dtype:  #type: ignore
                    # change the labels to the dt64 dtype, i.e., if the key is years, recast the labels as years, and do a Boolean selection of everything that matches each key
                    labels_ref = labels.astype(key.dtype)  # type: ignore
                    # NOTE: this is only correct if both key and labels are dt64, and key is a less granular unit, as the order in the key and will not be used
                    # let Boolean key advance to next branch
                    key = reduce(OPERATORS['__or__'],
                                 (labels_ref == k
                                  for k in key))  # type: ignore

            if is_array and key.dtype == DTYPE_BOOL:  #type: ignore
                if offset_apply:
                    return positions[key] + offset
                return positions[key]

            # map labels to integer positions, return a list of integer positions
            # NOTE: we may miss the opportunity to identify contiguous keys and extract a slice
            # NOTE: we do more branching here to optimize performance
            if partial_selection:
                if offset_apply:
                    return [
                        label_to_pos[k] + offset for k in key
                        if k in label_to_pos
                    ]  #type: ignore
                return [label_to_pos[k] for k in key
                        if k in label_to_pos]  # type: ignore
            if offset_apply:
                return [label_to_pos[k] + offset for k in key]  #type: ignore
            return [label_to_pos[k] for k in key]  # type: ignore

        # if a single element (an integer, string, or date, we just get the integer out of the map
        if offset_apply:
            return label_to_pos[key] + offset  #type: ignore
        return label_to_pos[key]  #type: ignore