def test_iterable_to_array_a(self, array: np.ndarray) -> None: values = array.tolist() post, _ = util.iterable_to_array(values) self.assertAlmostEqualValues(post, values) # explicitly giving object dtype post, _ = util.iterable_to_array(values, dtype=util.DTYPE_OBJECT) self.assertAlmostEqualValues(post, values)
def test_iterable_to_array_a(self): a1, is_unique = iterable_to_array({3, 4, 5}) self.assertTrue(is_unique) self.assertEqual(set(a1.tolist()), {3, 4, 5}) a2, is_unique = iterable_to_array({None: 3, 'f': 4, 39: 0}) self.assertTrue(is_unique) self.assertEqual(set(a2.tolist()), {None, 'f', 39}) a3, is_unique = iterable_to_array((x * 10 for x in range(1, 4))) self.assertFalse(is_unique) self.assertEqual(a3.tolist(), [10, 20, 30])
def test_iterable_to_array_d(self) -> None: self.assertEqual( iterable_to_array((True, False, True))[0].dtype, np.dtype('bool')) self.assertEqual( iterable_to_array((0, 1, 0), dtype=bool)[0].dtype, np.dtype('bool')) self.assertEqual( iterable_to_array((1, 2, 'w'))[0].dtype, np.dtype('O')) self.assertEqual( iterable_to_array(((2, 3), (3, 2)))[0].tolist(), [(2, 3), (3, 2)])
def test_iterable_to_array_b(self) -> None: iterable: tp.Iterable[tp.Any] for iterable in ( # type: ignore [1, 2, 3], dict(a=1, b=2, c=3).values(), dict(a=1, b=2, c=3).keys(), {1, 2, 3}, frozenset( (1, 2, 3)), ('a', 3, None), (1, 2, 'e', 1.1)): a1, _ = iterable_to_array(iterable) self.assertEqual(set(a1), set(iterable)) a2, _ = iterable_to_array(iter(iterable)) self.assertEqual(set(a2), set(iterable))
def isin(self, other: tp.Iterable[tp.Any]) -> np.ndarray: '''Return a Boolean array showing True where a label is found in other. If other is a multidimensional array, it is flattened. ''' if self._recache: self._update_array_cache() v, assume_unique = iterable_to_array(other) return np.in1d(self._labels, v, assume_unique=assume_unique)
def isin(self, other: tp.Iterable[tp.Iterable[tp.Hashable]]) -> np.ndarray: '''Return a Boolean array showing True where one or more of the passed in iterable of labels is found in the index. ''' if self._recache: self._update_array_cache() matches = [] for seq in other: if not hasattr(seq, '__iter__'): raise RuntimeError( 'must provide one or more iterables within an iterable') # must use iterable to array to properly handle heterogenous types, or if already an array v, _ = iterable_to_array(seq) # if seq is a tuple, could check self.keys() if len(v) == self.depth: matches.append(v) if not matches: return np.full(self._length, False, dtype=bool) values = self._labels # NOTE: when doing 2d to 1d comparison, np.isin does elementwise comparison, as does ==, but == is shown to be faster in this context array = np.full(self._length, False, dtype=bool) for match in matches: array |= (values == match).all(axis=1) array.flags.writeable = False return array
def test_iterable_to_array_c(self) -> None: iterable: tp.Iterable[tp.Any] for iterable, dtype in ( # type: ignore ([1, 2, 3], int), (dict(a=1, b=2, c=3).values(), int), (dict(a=1, b=2, c=3).keys(), str), ({1, 2, 3}, int), (frozenset((1, 2, 3)), int), (('a', 3, None), object), ((1, 2, 'e', 1.1), object), ): a1, _ = iterable_to_array(iterable, dtype=dtype) self.assertEqual(set(a1), set(iterable)) a2, _ = iterable_to_array(iter(iterable), dtype=dtype) self.assertEqual(set(a2), set(iterable))
def test_resolve_type_iter_i(self) -> None: a0 = range(3, 7) resolved, has_tuple, values = resolve_type_iter(a0) # a copy is not made self.assertEqual(id(a0), id(values)) self.assertEqual(resolved, None) post = iterable_to_array(a0) self.assertEqual(post[0].tolist(), [3, 4, 5, 6])
def isin(self, other) -> 'Series': ''' Return a same-sized Boolean Series that shows if the same-positoined element is in the iterable passed to the function. ''' # cannot use assume_unique because do not know if values is unique v, _ = iterable_to_array(other) # NOTE: could identify empty iterable and create False array array = np.in1d(self.values, v) array.flags.writeable = False return self.__class__(array, index=self._index)
def test_iterable_to_array_a(self) -> None: a1, is_unique = iterable_to_array({3,4,5}) self.assertTrue(is_unique) self.assertEqual(set(a1.tolist()), {3,4,5}) a2, is_unique = iterable_to_array({None: 3, 'f': 4, 39: 0}) self.assertTrue(is_unique) self.assertEqual(set(a2.tolist()), {None, 'f', 39}) a3, is_unique = iterable_to_array((x*10 for x in range(1,4))) self.assertFalse(is_unique) self.assertEqual(a3.tolist(), [10, 20, 30]) a1, is_unique = iterable_to_array({3,4,5}, dtype=np.dtype(int)) self.assertEqual(set(a1.tolist()), {3,4,5}) a1, is_unique = iterable_to_array((3,4,5), dtype=np.dtype(object)) self.assertTrue(a1.dtype == object) self.assertEqual(a1.tolist(), [3,4,5]) x = [(0, 0), (0, 1), (0, 2), (0, 3)] a1, is_unique = iterable_to_array(x, np.dtype(object)) self.assertEqual(a1.tolist(), [(0, 0), (0, 1), (0, 2), (0, 3)]) # must get an array of tuples back x = [(0, 0), (0, 1), (0, 2), (0, 3)] a1, is_unique = iterable_to_array(iter(x)) self.assertEqual(a1.tolist(), [(0, 0), (0, 1), (0, 2), (0, 3)]) a4 = np.array([np.nan, 0j], dtype=object) post, _ = iterable_to_array(a4) self.assertAlmostEqualValues(a4, post) self.assertEqual(iterable_to_array((1, 1.1))[0].dtype, np.dtype('float64')) self.assertEqual(iterable_to_array((1.1, 0, -29))[0].dtype, np.dtype('float64'))
def test_resolve_type_iter_h(self) -> None: def a() -> tp.Iterator[tp.Any]: yield 10 yield None for i in range(3): yield i yield (3, 4) resolved, has_tuple, values = resolve_type_iter(a()) self.assertEqual(values, [10, None, 0, 1, 2, (3, 4)]) self.assertEqual(resolved, object) # we stop evaluation after finding object self.assertEqual(has_tuple, True) post = iterable_to_array(a()) self.assertEqual(post[0].tolist(), [10, None, 0, 1, 2, (3, 4)])
def _extract_labels(mapping, labels, dtype=None) -> np.ndarray: '''Derive labels, a cache of the mapping keys in a sequence type (either an ndarray or a list). If the labels passed at instantiation are an ndarray, they are used after immutable filtering. Otherwise, the mapping keys are used to create an ndarray. This method is overridden in the derived class. Args: labels: might be an expired Generator, but if it is an immutable ndarray, we can use it without a copy. ''' # pre-fetching labels for faster get_item construction if isinstance(labels, np.ndarray): if dtype is not None and dtype != labels.dtype: raise RuntimeError('invalid label dtype for this Index') return immutable_filter(labels) if hasattr(labels, '__len__'): # not a generator, not an array # resolving the detype is expensive labels, _ = iterable_to_array(labels, dtype=dtype) else: # labels may be an expired generator, must use the mapping # TODO: explore why this does not work # if dtype is None: # labels = np.array(list(mapping.keys()), dtype=object) # else: # labels = np.fromiter(mapping.keys(), count=len(mapping), dtype=dtype) labels_len = len(mapping) if labels_len == 0: labels = EMPTY_ARRAY else: labels = np.empty(labels_len, dtype=dtype if dtype else object) for k, v in mapping.items(): labels[v] = k labels.flags.writeable = False return labels
def test_iterable_to_array_b(self, labels: tp.Iterable[tp.Any]) -> None: post, _ = util.iterable_to_array(labels) self.assertAlmostEqualValues(post, labels) self.assertTrue(isinstance(post, np.ndarray))