Пример #1
0
 def duplicated(self, keep="first"):
     if isinstance(self, ABCIndexClass):
         if self.is_unique:
             return np.zeros(len(self), dtype=np.bool)
         return duplicated(self, keep=keep)
     else:
         return self._constructor(duplicated(self, keep=keep),
                                  index=self.index).__finalize__(self)
Пример #2
0
 def duplicated(self, keep='first'):
     from pandas.core.algorithms import duplicated
     if isinstance(self, ABCIndexClass):
         if self.is_unique:
             return np.zeros(len(self), dtype=np.bool)
         return duplicated(self, keep=keep)
     else:
         return self._constructor(duplicated(self, keep=keep),
                                  index=self.index).__finalize__(self)
Пример #3
0
 def duplicated(self, keep='first'):
     from pandas.core.algorithms import duplicated
     if isinstance(self, ABCIndexClass):
         if self.is_unique:
             return np.zeros(len(self), dtype=np.bool)
         return duplicated(self, keep=keep)
     else:
         return self._constructor(duplicated(self, keep=keep),
                                  index=self.index).__finalize__(self)
Пример #4
0
    def test_datetime_likes(self):

        dt = ['2011-01-01', '2011-01-02', '2011-01-01', 'NaT', '2011-01-03',
              '2011-01-02', '2011-01-04', '2011-01-01', 'NaT', '2011-01-06']
        td = ['1 days', '2 days', '1 days', 'NaT', '3 days',
              '2 days', '4 days', '1 days', 'NaT', '6 days']

        cases = [np.array([pd.Timestamp(d) for d in dt]),
                 np.array([pd.Timestamp(d, tz='US/Eastern') for d in dt]),
                 np.array([pd.Period(d, freq='D') for d in dt]),
                 np.array([np.datetime64(d) for d in dt]),
                 np.array([pd.Timedelta(d) for d in td])]

        exp_first = np.array([False, False, True, False, False,
                              True, False, True, True, False])
        exp_last = np.array([True, True, True, True, False,
                             False, False, False, False, False])
        exp_false = exp_first | exp_last

        for case in cases:
            res_first = algos.duplicated(case, keep='first')
            tm.assert_numpy_array_equal(res_first, exp_first)

            res_last = algos.duplicated(case, keep='last')
            tm.assert_numpy_array_equal(res_last, exp_last)

            res_false = algos.duplicated(case, keep=False)
            tm.assert_numpy_array_equal(res_false, exp_false)

            # index
            for idx in [pd.Index(case), pd.Index(case, dtype='category'),
                        pd.Index(case, dtype=object)]:
                res_first = idx.duplicated(keep='first')
                tm.assert_numpy_array_equal(res_first, exp_first)

                res_last = idx.duplicated(keep='last')
                tm.assert_numpy_array_equal(res_last, exp_last)

                res_false = idx.duplicated(keep=False)
                tm.assert_numpy_array_equal(res_false, exp_false)

            # series
            for s in [pd.Series(case), pd.Series(case, dtype='category'),
                      pd.Series(case, dtype=object)]:
                res_first = s.duplicated(keep='first')
                tm.assert_series_equal(res_first, pd.Series(exp_first))

                res_last = s.duplicated(keep='last')
                tm.assert_series_equal(res_last, pd.Series(exp_last))

                res_false = s.duplicated(keep=False)
                tm.assert_series_equal(res_false, pd.Series(exp_false))
Пример #5
0
    def test_numeric_object_likes(self):
        cases = [np.array([1, 2, 1, 5, 3,
                           2, 4, 1, 5, 6]),
                 np.array([1.1, 2.2, 1.1, np.nan, 3.3,
                           2.2, 4.4, 1.1, np.nan, 6.6]),
                 np.array([1 + 1j, 2 + 2j, 1 + 1j, 5 + 5j, 3 + 3j,
                           2 + 2j, 4 + 4j, 1 + 1j, 5 + 5j, 6 + 6j]),
                 np.array(['a', 'b', 'a', 'e', 'c',
                           'b', 'd', 'a', 'e', 'f'], dtype=object),
                 np.array([1, 2**63, 1, 3**5, 10,
                           2**63, 39, 1, 3**5, 7], dtype=np.uint64)]

        exp_first = np.array([False, False, True, False, False,
                              True, False, True, True, False])
        exp_last = np.array([True, True, True, True, False,
                             False, False, False, False, False])
        exp_false = exp_first | exp_last

        for case in cases:
            res_first = algos.duplicated(case, keep='first')
            tm.assert_numpy_array_equal(res_first, exp_first)

            res_last = algos.duplicated(case, keep='last')
            tm.assert_numpy_array_equal(res_last, exp_last)

            res_false = algos.duplicated(case, keep=False)
            tm.assert_numpy_array_equal(res_false, exp_false)

            # index
            for idx in [pd.Index(case), pd.Index(case, dtype='category')]:
                res_first = idx.duplicated(keep='first')
                tm.assert_numpy_array_equal(res_first, exp_first)

                res_last = idx.duplicated(keep='last')
                tm.assert_numpy_array_equal(res_last, exp_last)

                res_false = idx.duplicated(keep=False)
                tm.assert_numpy_array_equal(res_false, exp_false)

            # series
            for s in [pd.Series(case), pd.Series(case, dtype='category')]:
                res_first = s.duplicated(keep='first')
                tm.assert_series_equal(res_first, pd.Series(exp_first))

                res_last = s.duplicated(keep='last')
                tm.assert_series_equal(res_last, pd.Series(exp_last))

                res_false = s.duplicated(keep=False)
                tm.assert_series_equal(res_false, pd.Series(exp_false))
Пример #6
0
    def test_duplicated_with_nas(self):
        keys = np.array([0, 1, np.nan, 0, 2, np.nan], dtype=object)

        result = algos.duplicated(keys)
        expected = np.array([False, False, False, True, False, True])
        tm.assert_numpy_array_equal(result, expected)

        result = algos.duplicated(keys, keep='first')
        expected = np.array([False, False, False, True, False, True])
        tm.assert_numpy_array_equal(result, expected)

        result = algos.duplicated(keys, keep='last')
        expected = np.array([True, False, True, False, False, False])
        tm.assert_numpy_array_equal(result, expected)

        result = algos.duplicated(keys, keep=False)
        expected = np.array([True, False, True, True, False, True])
        tm.assert_numpy_array_equal(result, expected)

        keys = np.empty(8, dtype=object)
        for i, t in enumerate(zip([0, 0, np.nan, np.nan] * 2,
                                  [0, np.nan, 0, np.nan] * 2)):
            keys[i] = t

        result = algos.duplicated(keys)
        falses = [False] * 4
        trues = [True] * 4
        expected = np.array(falses + trues)
        tm.assert_numpy_array_equal(result, expected)

        result = algos.duplicated(keys, keep='last')
        expected = np.array(trues + falses)
        tm.assert_numpy_array_equal(result, expected)

        result = algos.duplicated(keys, keep=False)
        expected = np.array(trues + trues)
        tm.assert_numpy_array_equal(result, expected)
Пример #7
0
    def test_duplicated_with_nas(self):
        keys = np.array([0, 1, np.nan, 0, 2, np.nan], dtype=object)

        result = algos.duplicated(keys)
        expected = np.array([False, False, False, True, False, True])
        tm.assert_numpy_array_equal(result, expected)

        result = algos.duplicated(keys, keep='first')
        expected = np.array([False, False, False, True, False, True])
        tm.assert_numpy_array_equal(result, expected)

        result = algos.duplicated(keys, keep='last')
        expected = np.array([True, False, True, False, False, False])
        tm.assert_numpy_array_equal(result, expected)

        result = algos.duplicated(keys, keep=False)
        expected = np.array([True, False, True, True, False, True])
        tm.assert_numpy_array_equal(result, expected)

        keys = np.empty(8, dtype=object)
        for i, t in enumerate(zip([0, 0, np.nan, np.nan] * 2,
                                  [0, np.nan, 0, np.nan] * 2)):
            keys[i] = t

        result = algos.duplicated(keys)
        falses = [False] * 4
        trues = [True] * 4
        expected = np.array(falses + trues)
        tm.assert_numpy_array_equal(result, expected)

        result = algos.duplicated(keys, keep='last')
        expected = np.array(trues + falses)
        tm.assert_numpy_array_equal(result, expected)

        result = algos.duplicated(keys, keep=False)
        expected = np.array(trues + trues)
        tm.assert_numpy_array_equal(result, expected)
Пример #8
0
 def duplicated(self, keep="first"):
     return duplicated(self._values, keep=keep)
Пример #9
0
 def _duplicated(
         self,
         keep: Literal["first", "last", False] = "first") -> np.ndarray:
     return duplicated(self._values, keep=keep)
Пример #10
0
 def _duplicated(self, keep: Union[str, bool] = "first") -> np.ndarray:
     return duplicated(self._values, keep=keep)
Пример #11
0
 def _duplicated(
     self,
     keep: Literal["first", "last",
                   False] = "first") -> npt.NDArray[np.bool_]:
     return duplicated(self._values, keep=keep)
Пример #12
0
 def duplicated(self, keep: Union[str, bool] = "first") -> np.ndarray:
     # error: Value of type variable "ArrayLike" of "duplicated" cannot be
     # "Union[ExtensionArray, ndarray]"
     return duplicated(self._values, keep=keep)  # type: ignore[type-var]
Пример #13
0
 def _duplicated(self, keep: str | bool = "first") -> np.ndarray:
     return duplicated(self._values, keep=keep)