示例#1
0
 def test_setdiff2d(self, arrays: tp.Sequence[np.ndarray]) -> None:
     post = util.setdiff2d(arrays[0], arrays[1], assume_unique=False)
     self.assertTrue(post.ndim == 2)
     self.assertTrue(
         len(post) == len(
             set(util.array2d_to_tuples(arrays[0])).difference(
                 set(util.array2d_to_tuples(arrays[1])))))
示例#2
0
    def test_intersect2d(self, arrays: tp.Sequence[np.ndarray]) -> None:
        if datetime64_not_aligned(arrays[0], arrays[1]):
            return

        post = util.intersect2d(arrays[0], arrays[1], assume_unique=False)
        self.assertTrue(post.ndim == 2)
        self.assertTrue(
            len(post) == len(
                set(util.array2d_to_tuples(arrays[0]))
                & set(util.array2d_to_tuples(arrays[1]))))
示例#3
0
    def test_intersect2d(self, arrays: tp.Sequence[np.ndarray]) -> None:
        post = util.intersect2d(arrays[0], arrays[1], assume_unique=False)
        if post.dtype == object:
            self.assertTrue(post.ndim == 1)
        else:
            self.assertTrue(post.ndim == 2)

        self.assertTrue(
            len(post) == len(
                set(util.array2d_to_tuples(arrays[0]))
                & set(util.array2d_to_tuples(arrays[1]))))
示例#4
0
    def test_setdiff2d(self, arrays: tp.Sequence[np.ndarray]) -> None:
        if datetime64_not_aligned(arrays[0], arrays[1]):
            return

        for array in arrays:
            if array.dtype.kind in ('f', 'c') and np.isnan(array).any():
                return

        post = util.setdiff2d(arrays[0], arrays[1], assume_unique=False)
        self.assertTrue(post.ndim == 2)
        self.assertTrue(
            len(post) == len(
                set(util.array2d_to_tuples(arrays[0])).difference(
                    set(util.array2d_to_tuples(arrays[1])))))
示例#5
0
    def test_union2d(self, arrays: tp.Sequence[np.ndarray]) -> None:
        if datetime64_not_aligned(arrays[0], arrays[1]):
            return

        post = util.union2d(arrays[0], arrays[1], assume_unique=False)
        self.assertTrue(post.ndim == 2)

        if post.dtype.kind in ('f', 'c') and np.isnan(post).any():
            return

        self.assertTrue(
            len(post) == len(
                set(util.array2d_to_tuples(arrays[0]))
                | set(util.array2d_to_tuples(arrays[1]))))
示例#6
0
 def _update_array_cache(self):
     # extract all features from self._levels
     self._depth = next(self._levels.depths())
     # store both NP array of labels, as well as KeysView of hashable tuples
     self._labels = self._levels.get_labels()
     # note: this does not retain order in 3.5
     self._keys = KeysView._from_iterable(array2d_to_tuples(self._labels))
     # if we get labels, faster to get that length
     self._length = len(self._labels)  #self._levels.__len__()
     self._recache = False
示例#7
0
文件: series.py 项目: CrepeGoat/FEHnt
    def to_pairs(self) -> tp.Iterable[tp.Tuple[tp.Hashable, tp.Any]]:
        '''
        Return a tuple of tuples, where each inner tuple is a pair of index label, value.
        '''
        if isinstance(self._index, IndexHierarchy):
            index_values = list(array2d_to_tuples(self._index.values))
        else:
            index_values = self._index.values

        return tuple(zip(index_values, self.values))
示例#8
0
    def __init__(self,
            labels: IndexInitializer,
            *,
            loc_is_iloc: bool = False,
            name: tp.Hashable = None,
            dtype: DtypeSpecifier = None
            ) -> None:

        self._recache = False
        self._map = None
        positions = None

        # resolve the targetted labels dtype, by lookin at the class attr _DTYPE and/or the passed dtype argument
        if dtype is None:
            dtype_extract = self._DTYPE # set in some specialized Index classes
        else: # passed dtype is not None
            if self._DTYPE is not None and dtype != self._DTYPE:
                raise RuntimeError('invalid dtype argument for this Index',
                        dtype, self._DTYPE)
            # self._DTYPE is None, passed dtype is not None, use dtype
            dtype_extract = dtype

        # handle all Index subclasses
        # check isinstance(labels, IndexBase)
        if issubclass(labels.__class__, IndexBase):
            if labels._recache:
                labels._update_array_cache()
            if name is None and labels.name is not None:
                name = labels.name # immutable, so no copy necessary
            if labels.depth == 1: # not an IndexHierarchy
                if labels.STATIC: # can take the map
                    self._map = labels._map
                # get a reference to the immutable arrays, even if this is an IndexGO index, we can take the cached arrays, assuming they are up to date
                positions = labels._positions
                loc_is_iloc = labels._loc_is_iloc
                labels = labels._labels
            else: # IndexHierarchy
                # will be a generator of tuples; already updated caches
                labels = array2d_to_tuples(labels._labels)
        elif hasattr(labels, 'values'):
            # it is a Series or similar
            array = labels.values
            if array.ndim == 1:
                labels = array
            else:
                labels = array2d_to_tuples(array)

        if self._DTYPE is not None:
            # do not need to check arrays, as will and checked to match dtype_extract in _extract_labels
            if not isinstance(labels, np.ndarray):
                # for now, assume that if _DTYPE is defined, we have a date
                labels = (to_datetime64(v, dtype_extract) for v in labels)
            else: # coerce to target type
                labels = labels.astype(dtype_extract)

        self._name = name if name is None else name_filter(name)

        if self._map is None:
            self._map = self._get_map(labels, positions)

        # this might be NP array, or a list, depending on if static or grow only; if an array, dtype will be compared with passed dtype_extract
        self._labels = self._extract_labels(self._map, labels, dtype_extract)
        self._positions = self._extract_positions(self._map, positions)

        if self._DTYPE and self._labels.dtype != self._DTYPE:
            raise RuntimeError('invalid label dtype for this Index',
                    self._labels.dtype, self._DTYPE)
        if len(self._map) != len(self._labels):
            raise KeyError(f'labels ({len(self._labels)}) have non-unique values ({len(self._map)})')

        # NOTE: automatic discovery is possible, but not yet implemented
        self._loc_is_iloc = loc_is_iloc
示例#9
0
 def flat(self):
     '''Return a flat, one-dimensional index of tuples for each level.
     '''
     return self._INDEX_CONSTRUCTOR(array2d_to_tuples(self.__iter__()))
示例#10
0
    def from_correspondence(cls, src_index: 'Index',
                            dst_index: 'Index') -> 'IndexCorrespondence':
        '''
        Return an IndexCorrespondence instance from the correspondence of two Index or IndexHierarchy objects.
        '''
        mixed_depth = False
        if src_index.depth == dst_index.depth:
            depth = src_index.depth
        else:
            # if dimensions are mixed, the only way there can be a match is if the 1D index is of object type (so it can hold a tuple); otherwise, there can be no matches;
            if src_index.depth == 1 and src_index.values.dtype.kind == 'O':
                depth = dst_index.depth
                mixed_depth = True
            elif dst_index.depth == 1 and dst_index.values.dtype.kind == 'O':
                depth = src_index.depth
                mixed_depth = True
            else:
                depth = 0

        # need to use lower level array methods go get intersection, rather than Index methods, as need arrays, not Index objects
        if depth == 1:
            # NOTE: this can fail in some cases: comparing two object arrays with NaNs and strings.
            common_labels = intersect1d(src_index.values,
                                        dst_index.values,
                                        assume_unique=True)
            has_common = len(common_labels) > 0
            assert not mixed_depth
        elif depth > 1:
            # if either values arrays are object, we have to covert all values to tuples
            common_labels = intersect2d(src_index.values,
                                        dst_index.values,
                                        assume_unique=True)
            if mixed_depth:
                # when mixed, on the 1D index we have to use loc_to_iloc with tuples
                common_labels = list(array2d_to_tuples(common_labels))
            has_common = len(common_labels) > 0
        else:
            has_common = False

        size = len(dst_index.values)

        # either a reordering or a subset
        if has_common:

            if len(common_labels) == len(dst_index):
                # use new index to retain order
                values_dst = dst_index.values
                if values_dst.dtype == DTYPE_BOOL:
                    # if the index values are a Boolean array, loc_to_iloc will try to do a Boolean selection, which is incorrect. Using a list avoids this problem.
                    iloc_src = src_index.loc_to_iloc(values_dst.tolist())
                else:
                    iloc_src = src_index.loc_to_iloc(values_dst)
                iloc_dst = np.arange(size)
                return cls(has_common=has_common,
                           is_subset=True,
                           iloc_src=iloc_src,
                           iloc_dst=iloc_dst,
                           size=size)

            # these will be equal sized
            iloc_src = src_index.loc_to_iloc(common_labels)
            iloc_dst = dst_index.loc_to_iloc(common_labels)

            # if iloc_src.dtype != int:
            #     import ipdb; ipdb.set_trace()
            return cls(has_common=has_common,
                       is_subset=False,
                       iloc_src=iloc_src,
                       iloc_dst=iloc_dst,
                       size=size)

        return cls(has_common=has_common,
                   is_subset=False,
                   iloc_src=None,
                   iloc_dst=None,
                   size=size)
示例#11
0
    def __init__(self,
                 labels: IndexInitializer,
                 *,
                 loc_is_iloc: bool = False,
                 name: NameType = NAME_DEFAULT,
                 dtype: DtypeSpecifier = None) -> None:

        self._recache: bool = False
        self._map: tp.Optional[FrozenAutoMap] = None

        positions = None
        is_typed = self._DTYPE is not None

        # resolve the targetted labels dtype, by lookin at the class attr _DTYPE and/or the passed dtype argument
        if dtype is None:
            dtype_extract = self._DTYPE  # set in some specialized Index classes
        else:  # passed dtype is not None
            if is_typed and dtype != self._DTYPE:
                # NOTE: should never get to this branch, as derived Index classes that set _DTYPE remove dtype from __init__
                raise ErrorInitIndex('invalid dtype argument for this Index',
                                     dtype, self._DTYPE)  #pragma: no cover
            # self._DTYPE is None, passed dtype is not None, use dtype
            dtype_extract = dtype

        #-----------------------------------------------------------------------
        # handle all Index subclasses
        if isinstance(labels, IndexBase):
            if labels._recache:
                labels._update_array_cache()
            if name is NAME_DEFAULT:
                name = labels.name  # immutable, so no copy necessary
            if isinstance(labels, Index):  # not an IndexHierarchy
                if (labels.STATIC and self.STATIC and dtype is None):
                    if not is_typed or (is_typed
                                        and self._DTYPE == labels.dtype):
                        # can take the map if static and if types in the dict are the same as those in the labels (or to become the labels after conversion)
                        self._map = labels._map
                # get a reference to the immutable arrays, even if this is an IndexGO index, we can take the cached arrays, assuming they are up to date; for datetime64 indices, we might need to translate to a different type
                positions = labels._positions
                loc_is_iloc = labels._map is None
                labels = labels._labels
            else:  # IndexHierarchy
                # will be a generator of tuples; already updated caches
                labels = array2d_to_tuples(labels.__iter__())
        elif isinstance(labels, ContainerOperand):
            # it is a Series or similar
            array = labels.values
            if array.ndim == 1:
                labels = array
            else:
                labels = array2d_to_tuples(array)
        # else: assume an iterable suitable for labels usage

        #-----------------------------------------------------------------------
        if is_typed:
            # do not need to check arrays, as will and checked to match dtype_extract in _extract_labels
            if not isinstance(labels, np.ndarray):
                # for now, assume that if _DTYPE is defined, we have a date
                labels = (to_datetime64(v, dtype_extract) for v in labels)
            # coerce to target type
            elif labels.dtype != dtype_extract:
                labels = labels.astype(dtype_extract)
                labels.flags.writeable = False  #type: ignore

        self._name = None if name is NAME_DEFAULT else name_filter(name)

        if self._map is None:  # if _map not shared from another Index
            if not loc_is_iloc:
                try:
                    self._map = FrozenAutoMap(
                        labels) if self.STATIC else AutoMap(labels)
                except ValueError:  # Automap will raise ValueError of non-unique values are encountered
                    pass
                if self._map is None:
                    raise ErrorInitIndex(
                        f'labels ({len(tuple(labels))}) have non-unique values ({len(set(labels))})'
                    )
                size = len(self._map)
            else:  # must assume labels are unique
                # labels must not be a generator, but we assume that internal clients that provided loc_is_iloc will not give a generator
                size = len(labels)  #type: ignore
                if positions is None:
                    positions = PositionsAllocator.get(size)
        else:  # map shared from another Index
            size = len(self._map)

        # this might be NP array, or a list, depending on if static or grow only; if an array, dtype will be compared with passed dtype_extract
        self._labels = self._extract_labels(self._map, labels, dtype_extract)
        self._positions = self._extract_positions(size, positions)

        if self._DTYPE and self._labels.dtype != self._DTYPE:
            raise ErrorInitIndex(
                'invalid label dtype for this Index',  #pragma: no cover
                self._labels.dtype,
                self._DTYPE)