Exemplo n.º 1
0
    def test_get_indexer_non_unique_nas(self, nulls_fixture):
        # even though this isn't non-unique, this should still work
        index = Index(["a", "b", nulls_fixture])
        indexer, missing = index.get_indexer_non_unique([nulls_fixture])

        expected_indexer = np.array([2], dtype=np.intp)
        expected_missing = np.array([], dtype=np.intp)
        tm.assert_numpy_array_equal(indexer, expected_indexer)
        tm.assert_numpy_array_equal(missing, expected_missing)

        # actually non-unique
        index = Index(["a", nulls_fixture, "b", nulls_fixture])
        indexer, missing = index.get_indexer_non_unique([nulls_fixture])

        expected_indexer = np.array([1, 3], dtype=np.intp)
        tm.assert_numpy_array_equal(indexer, expected_indexer)
        tm.assert_numpy_array_equal(missing, expected_missing)

        # matching-but-not-identical nans
        if is_matching_na(nulls_fixture, float("NaN")):
            index = Index(["a", float("NaN"), "b", float("NaN")])
            match_but_not_identical = True
        elif is_matching_na(nulls_fixture, Decimal("NaN")):
            index = Index(["a", Decimal("NaN"), "b", Decimal("NaN")])
            match_but_not_identical = True
        else:
            match_but_not_identical = False

        if match_but_not_identical:
            indexer, missing = index.get_indexer_non_unique([nulls_fixture])

            expected_indexer = np.array([1, 3], dtype=np.intp)
            tm.assert_numpy_array_equal(indexer, expected_indexer)
            tm.assert_numpy_array_equal(missing, expected_missing)
Exemplo n.º 2
0
    def test_get_indexer_non_unique(self):
        np.random.seed(123456789)

        ci = CategoricalIndex(list("aabbca"),
                              categories=list("cab"),
                              ordered=False)
        oidx = Index(np.array(ci))

        for n in [1, 2, 5, len(ci)]:
            finder = oidx[np.random.randint(0, len(ci), size=n)]
            expected = oidx.get_indexer_non_unique(finder)[0]

            actual = ci.get_indexer(finder)
            tm.assert_numpy_array_equal(expected, actual)

        # see gh-17323
        #
        # Even when indexer is equal to the
        # members in the index, we should
        # respect duplicates instead of taking
        # the fast-track path.
        for finder in [list("aabbca"), list("aababca")]:
            expected = oidx.get_indexer_non_unique(finder)[0]

            actual = ci.get_indexer(finder)
            tm.assert_numpy_array_equal(expected, actual)
Exemplo n.º 3
0
    def test_get_indexer_non_unique(self, idx_values, key_values, key_class):
        # GH 21448
        key = key_class(key_values, categories=range(1, 5))
        # Test for flat index and CategoricalIndex with same/different cats:
        for dtype in [None, "category", key.dtype]:
            idx = Index(idx_values, dtype=dtype)
            expected, exp_miss = idx.get_indexer_non_unique(key_values)
            result, res_miss = idx.get_indexer_non_unique(key)

            tm.assert_numpy_array_equal(expected, result)
            tm.assert_numpy_array_equal(exp_miss, res_miss)
Exemplo n.º 4
0
    def test_get_indexer_non_unique(self, idx_values, key_values, key_class):
        # GH 21448
        key = key_class(key_values, categories=range(1, 5))
        # Test for flat index and CategoricalIndex with same/different cats:
        for dtype in None, 'category', key.dtype:
            idx = Index(idx_values, dtype=dtype)
            expected, exp_miss = idx.get_indexer_non_unique(key_values)
            result, res_miss = idx.get_indexer_non_unique(key)

            tm.assert_numpy_array_equal(expected, result)
            tm.assert_numpy_array_equal(exp_miss, res_miss)
Exemplo n.º 5
0
def test_get_indexer_non_unique_nans_in_object_dtype_target(nulls_fixture):
    idx = Index([1.0, 2.0])
    target = Index([1, nulls_fixture], dtype="object")

    result_idx, result_missing = idx.get_indexer_non_unique(target)
    tm.assert_numpy_array_equal(result_idx, np.array([0, -1], dtype=np.intp))
    tm.assert_numpy_array_equal(result_missing, np.array([1], dtype=np.intp))
Exemplo n.º 6
0
    def test_get_indexer_non_unique(self, idx_values, key_values, key_class, dtype):
        # GH 21448
        key = key_class(key_values, categories=range(1, 5))

        if dtype == "key":
            dtype = key.dtype

        # Test for flat index and CategoricalIndex with same/different cats:
        idx = Index(idx_values, dtype=dtype)
        expected, exp_miss = idx.get_indexer_non_unique(key_values)
        result, res_miss = idx.get_indexer_non_unique(key)

        tm.assert_numpy_array_equal(expected, result)
        tm.assert_numpy_array_equal(exp_miss, res_miss)

        exp_unique = idx.unique().get_indexer(key_values)
        res_unique = idx.unique().get_indexer(key)
        tm.assert_numpy_array_equal(res_unique, exp_unique)
Exemplo n.º 7
0
 def test_get_indexer_non_unique_np_nats(self, np_nat_fixture,
                                         np_nat_fixture2):
     expected_missing = np.array([], dtype=np.intp)
     # matching-but-not-identical nats
     if is_matching_na(np_nat_fixture, np_nat_fixture2):
         # ensure nats are different objects
         index = Index(
             np.array(
                 [
                     "2021-10-02",
                     np_nat_fixture.copy(),
                     np_nat_fixture2.copy()
                 ],
                 dtype=object,
             ),
             dtype=object,
         )
         # pass as index to prevent target from being casted to DatetimeIndex
         indexer, missing = index.get_indexer_non_unique(
             Index([np_nat_fixture], dtype=object))
         expected_indexer = np.array([1, 2], dtype=np.intp)
         tm.assert_numpy_array_equal(indexer, expected_indexer)
         tm.assert_numpy_array_equal(missing, expected_missing)
     # dt64nat vs td64nat
     else:
         index = Index(
             np.array(
                 [
                     "2021-10-02",
                     np_nat_fixture,
                     np_nat_fixture2,
                     np_nat_fixture,
                     np_nat_fixture2,
                 ],
                 dtype=object,
             ),
             dtype=object,
         )
         # pass as index to prevent target from being casted to DatetimeIndex
         indexer, missing = index.get_indexer_non_unique(
             Index([np_nat_fixture], dtype=object))
         expected_indexer = np.array([1, 3], dtype=np.intp)
         tm.assert_numpy_array_equal(indexer, expected_indexer)
         tm.assert_numpy_array_equal(missing, expected_missing)
Exemplo n.º 8
0
    def test_get_indexer_numeric_vs_bool(self):
        left = Index([1, 2, 3])
        right = Index([True, False])

        res = left.get_indexer(right)
        expected = -1 * np.ones(len(right), dtype=np.intp)
        tm.assert_numpy_array_equal(res, expected)

        res = right.get_indexer(left)
        expected = -1 * np.ones(len(left), dtype=np.intp)
        tm.assert_numpy_array_equal(res, expected)

        res = left.get_indexer_non_unique(right)[0]
        expected = -1 * np.ones(len(right), dtype=np.intp)
        tm.assert_numpy_array_equal(res, expected)

        res = right.get_indexer_non_unique(left)[0]
        expected = -1 * np.ones(len(left), dtype=np.intp)
        tm.assert_numpy_array_equal(res, expected)
Exemplo n.º 9
0
 def _make_indexer(self, self_indexer: Index, other_indexer: Index):
     if self.aggregation_required:
         group_ints, group_order = other_indexer.factorize()
         self.other_grouper = group_ints
         self.flat_indexer, self.missing_indices = group_order.get_indexer_non_unique(
             self_indexer)
     else:  # Performance-tuned fast paths for constructing indexers
         if self_indexer.equals(other_indexer):  # Indexers are identical
             self.flat_indexer = np.arange(len(other_indexer))
             self.missing_indices = np.array([], dtype=int)
         elif len(self_indexer.difference(
                 other_indexer)) == 0:  # No missing values
             # Taking the difference is faster than `all(.isin())`
             self.missing_indices = np.array([], dtype=int)
             self.flat_indexer = other_indexer.get_indexer(self_indexer)
         else:  # All other cases
             self.flat_indexer, self.missing_indices = other_indexer.get_indexer_non_unique(
                 self_indexer)