示例#1
0
文件: test_ops.py 项目: sduzjp/Python
    def test_duplicated_drop_duplicates_index(self):
        # GH 4060
        for original in self.objs:
            if isinstance(original, Index):

                # special case
                if original.is_boolean():
                    result = original.drop_duplicates()
                    expected = Index([False, True], name="a")
                    tm.assert_index_equal(result, expected)
                    continue

                # original doesn't have duplicates
                expected = np.array([False] * len(original), dtype=bool)
                duplicated = original.duplicated()
                tm.assert_numpy_array_equal(duplicated, expected)
                assert duplicated.dtype == bool
                result = original.drop_duplicates()
                tm.assert_index_equal(result, original)
                assert result is not original

                # has_duplicates
                assert not original.has_duplicates

                # create repeated values, 3rd and 5th values are duplicated
                idx = original[list(range(len(original))) + [5, 3]]
                expected = np.array([False] * len(original) + [True, True], dtype=bool)
                duplicated = idx.duplicated()
                tm.assert_numpy_array_equal(duplicated, expected)
                assert duplicated.dtype == bool
                tm.assert_index_equal(idx.drop_duplicates(), original)

                base = [False] * len(idx)
                base[3] = True
                base[5] = True
                expected = np.array(base)

                duplicated = idx.duplicated(keep="last")
                tm.assert_numpy_array_equal(duplicated, expected)
                assert duplicated.dtype == bool
                result = idx.drop_duplicates(keep="last")
                tm.assert_index_equal(result, idx[~expected])

                base = [False] * len(original) + [True, True]
                base[3] = True
                base[5] = True
                expected = np.array(base)

                duplicated = idx.duplicated(keep=False)
                tm.assert_numpy_array_equal(duplicated, expected)
                assert duplicated.dtype == bool
                result = idx.drop_duplicates(keep=False)
                tm.assert_index_equal(result, idx[~expected])

                with pytest.raises(
                    TypeError,
                    match=r"drop_duplicates\(\) got an unexpected keyword argument",
                ):
                    idx.drop_duplicates(inplace=True)

            else:
                expected = Series(
                    [False] * len(original), index=original.index, name="a"
                )
                tm.assert_series_equal(original.duplicated(), expected)
                result = original.drop_duplicates()
                tm.assert_series_equal(result, original)
                assert result is not original

                idx = original.index[list(range(len(original))) + [5, 3]]
                values = original._values[list(range(len(original))) + [5, 3]]
                s = Series(values, index=idx, name="a")

                expected = Series(
                    [False] * len(original) + [True, True], index=idx, name="a"
                )
                tm.assert_series_equal(s.duplicated(), expected)
                tm.assert_series_equal(s.drop_duplicates(), original)

                base = [False] * len(idx)
                base[3] = True
                base[5] = True
                expected = Series(base, index=idx, name="a")

                tm.assert_series_equal(s.duplicated(keep="last"), expected)
                tm.assert_series_equal(
                    s.drop_duplicates(keep="last"), s[~np.array(base)]
                )

                base = [False] * len(original) + [True, True]
                base[3] = True
                base[5] = True
                expected = Series(base, index=idx, name="a")

                tm.assert_series_equal(s.duplicated(keep=False), expected)
                tm.assert_series_equal(
                    s.drop_duplicates(keep=False), s[~np.array(base)]
                )

                s.drop_duplicates(inplace=True)
                tm.assert_series_equal(s, original)
示例#2
0
 def test_basic(self, categories, ordered):
     c1 = CategoricalDtype(categories, ordered=ordered)
     tm.assert_index_equal(c1.categories, pd.Index(categories))
     assert c1.ordered is ordered
示例#3
0
 def test_categorical_categories(self):
     # GH17884
     c1 = CategoricalDtype(Categorical(["a", "b"]))
     tm.assert_index_equal(c1.categories, pd.Index(["a", "b"]))
     c1 = CategoricalDtype(CategoricalIndex(["a", "b"]))
     tm.assert_index_equal(c1.categories, pd.Index(["a", "b"]))
示例#4
0
    def test_concatlike_dtypes_coercion(self, item, item2):
        # GH 13660
        typ1, vals1 = item
        typ2, vals2 = item2

        vals3 = vals2

        # basically infer
        exp_index_dtype = None
        exp_series_dtype = None

        if typ1 == typ2:
            # same dtype is tested in test_concatlike_same_dtypes
            return
        elif typ1 == "category" or typ2 == "category":
            # The `vals1 + vals2` below fails bc one of these is a Categorical
            #  instead of a list; we have separate dedicated tests for categorical
            return

        warn = None
        # specify expected dtype
        if typ1 == "bool" and typ2 in ("int64", "float64"):
            # series coerces to numeric based on numpy rule
            # index doesn't because bool is object dtype
            exp_series_dtype = typ2
            warn = FutureWarning
        elif typ2 == "bool" and typ1 in ("int64", "float64"):
            exp_series_dtype = typ1
            warn = FutureWarning
        elif (typ1 == "datetime64[ns, US/Eastern]"
              or typ2 == "datetime64[ns, US/Eastern]"
              or typ1 == "timedelta64[ns]" or typ2 == "timedelta64[ns]"):
            exp_index_dtype = object
            exp_series_dtype = object

        exp_data = vals1 + vals2
        exp_data3 = vals1 + vals2 + vals3

        # ----- Index ----- #

        # index.append
        res = Index(vals1).append(Index(vals2))
        exp = Index(exp_data, dtype=exp_index_dtype)
        tm.assert_index_equal(res, exp)

        # 3 elements
        res = Index(vals1).append([Index(vals2), Index(vals3)])
        exp = Index(exp_data3, dtype=exp_index_dtype)
        tm.assert_index_equal(res, exp)

        # ----- Series ----- #

        # series._append
        with tm.assert_produces_warning(warn,
                                        match="concatenating bool-dtype"):
            # GH#39817
            res = Series(vals1)._append(Series(vals2), ignore_index=True)
        exp = Series(exp_data, dtype=exp_series_dtype)
        tm.assert_series_equal(res, exp, check_index_type=True)

        # concat
        with tm.assert_produces_warning(warn,
                                        match="concatenating bool-dtype"):
            # GH#39817
            res = pd.concat([Series(vals1), Series(vals2)], ignore_index=True)
        tm.assert_series_equal(res, exp, check_index_type=True)

        # 3 elements
        with tm.assert_produces_warning(warn,
                                        match="concatenating bool-dtype"):
            # GH#39817
            res = Series(vals1)._append(
                [Series(vals2), Series(vals3)], ignore_index=True)
        exp = Series(exp_data3, dtype=exp_series_dtype)
        tm.assert_series_equal(res, exp)

        with tm.assert_produces_warning(warn,
                                        match="concatenating bool-dtype"):
            # GH#39817
            res = pd.concat(
                [Series(vals1), Series(vals2),
                 Series(vals3)],
                ignore_index=True,
            )
        tm.assert_series_equal(res, exp)
示例#5
0
    def test_factorize(self):
        idx1 = DatetimeIndex(
            ["2014-01", "2014-01", "2014-02", "2014-02", "2014-03", "2014-03"])

        exp_arr = np.array([0, 0, 1, 1, 2, 2], dtype=np.intp)
        exp_idx = DatetimeIndex(["2014-01", "2014-02", "2014-03"])

        arr, idx = idx1.factorize()
        tm.assert_numpy_array_equal(arr, exp_arr)
        tm.assert_index_equal(idx, exp_idx)

        arr, idx = idx1.factorize(sort=True)
        tm.assert_numpy_array_equal(arr, exp_arr)
        tm.assert_index_equal(idx, exp_idx)

        # tz must be preserved
        idx1 = idx1.tz_localize("Asia/Tokyo")
        exp_idx = exp_idx.tz_localize("Asia/Tokyo")

        arr, idx = idx1.factorize()
        tm.assert_numpy_array_equal(arr, exp_arr)
        tm.assert_index_equal(idx, exp_idx)

        idx2 = pd.DatetimeIndex(
            ["2014-03", "2014-03", "2014-02", "2014-01", "2014-03", "2014-01"])

        exp_arr = np.array([2, 2, 1, 0, 2, 0], dtype=np.intp)
        exp_idx = DatetimeIndex(["2014-01", "2014-02", "2014-03"])
        arr, idx = idx2.factorize(sort=True)
        tm.assert_numpy_array_equal(arr, exp_arr)
        tm.assert_index_equal(idx, exp_idx)

        exp_arr = np.array([0, 0, 1, 2, 0, 2], dtype=np.intp)
        exp_idx = DatetimeIndex(["2014-03", "2014-02", "2014-01"])
        arr, idx = idx2.factorize()
        tm.assert_numpy_array_equal(arr, exp_arr)
        tm.assert_index_equal(idx, exp_idx)

        # freq must be preserved
        idx3 = date_range("2000-01", periods=4, freq="M", tz="Asia/Tokyo")
        exp_arr = np.array([0, 1, 2, 3], dtype=np.intp)
        arr, idx = idx3.factorize()
        tm.assert_numpy_array_equal(arr, exp_arr)
        tm.assert_index_equal(idx, idx3)
示例#6
0
 def test_astype_from_object(self):
     index = Index([1.0, np.nan, 0.2], dtype="object")
     result = index.astype(float)
     expected = Float64Index([1.0, np.nan, 0.2])
     assert result.dtype == expected.dtype
     tm.assert_index_equal(result, expected)
示例#7
0
    def test_join_non_int_index(self):
        index = self.create_index()
        other = Index([3, 6, 7, 8, 10], dtype=object)

        outer = index.join(other, how="outer")
        outer2 = other.join(index, how="outer")
        expected = Index([0, 2, 3, 4, 6, 7, 8, 10, 12, 14, 16, 18])
        tm.assert_index_equal(outer, outer2)
        tm.assert_index_equal(outer, expected)

        inner = index.join(other, how="inner")
        inner2 = other.join(index, how="inner")
        expected = Index([6, 8, 10])
        tm.assert_index_equal(inner, inner2)
        tm.assert_index_equal(inner, expected)

        left = index.join(other, how="left")
        tm.assert_index_equal(left, index.astype(object))

        left2 = other.join(index, how="left")
        tm.assert_index_equal(left2, other)

        right = index.join(other, how="right")
        tm.assert_index_equal(right, other)

        right2 = other.join(index, how="right")
        tm.assert_index_equal(right2, index.astype(object))
示例#8
0
    def test_pivot_integer_bug(self):
        df = DataFrame(data=[("A", "1", "A1"), ("B", "2", "B2")])

        result = df.pivot(index=1, columns=0, values=2)
        repr(result)
        tm.assert_index_equal(result.columns, Index(["A", "B"], name=0))
示例#9
0
 def test_astype_column_metadata(self, dtype):
     # GH#19920
     columns = UInt64Index([100, 200, 300], name="foo")
     df = DataFrame(np.arange(15).reshape(5, 3), columns=columns)
     df = df.astype(dtype)
     tm.assert_index_equal(df.columns, columns)
 def test_constructor_iso(self):
     # GH #21877
     expected = timedelta_range("1s", periods=9, freq="s")
     durations = [f"P0DT0H0M{i}S" for i in range(1, 10)]
     result = to_timedelta(durations)
     tm.assert_index_equal(result, expected)
 def test_float64_unit_conversion(self):
     # GH#23539
     tdi = TimedeltaIndex([1.5, 2.25], unit="D")
     expected = TimedeltaIndex([Timedelta(days=1.5), Timedelta(days=2.25)])
     tm.assert_index_equal(tdi, expected)
示例#12
0
    def test_partial_set_empty_frame(self):

        # partially set with an empty object
        # frame
        df = DataFrame()

        with pytest.raises(ValueError):
            df.loc[1] = 1

        with pytest.raises(ValueError):
            df.loc[1] = Series([1], index=["foo"])

        with pytest.raises(ValueError):
            df.loc[:, 1] = 1

        # these work as they don't really change
        # anything but the index
        # GH5632
        expected = DataFrame(columns=["foo"], index=Index([], dtype="object"))

        def f():
            df = DataFrame(index=Index([], dtype="object"))
            df["foo"] = Series([], dtype="object")
            return df

        tm.assert_frame_equal(f(), expected)

        def f():
            df = DataFrame()
            df["foo"] = Series(df.index)
            return df

        tm.assert_frame_equal(f(), expected)

        def f():
            df = DataFrame()
            df["foo"] = df.index
            return df

        tm.assert_frame_equal(f(), expected)

        expected = DataFrame(columns=["foo"], index=Index([], dtype="int64"))
        expected["foo"] = expected["foo"].astype("float64")

        def f():
            df = DataFrame(index=Index([], dtype="int64"))
            df["foo"] = []
            return df

        tm.assert_frame_equal(f(), expected)

        def f():
            df = DataFrame(index=Index([], dtype="int64"))
            df["foo"] = Series(np.arange(len(df)), dtype="float64")
            return df

        tm.assert_frame_equal(f(), expected)

        def f():
            df = DataFrame(index=Index([], dtype="int64"))
            df["foo"] = range(len(df))
            return df

        expected = DataFrame(columns=["foo"], index=Index([], dtype="int64"))
        expected["foo"] = expected["foo"].astype("float64")
        tm.assert_frame_equal(f(), expected)

        df = DataFrame()
        tm.assert_index_equal(df.columns, Index([], dtype=object))
        df2 = DataFrame()
        df2[1] = Series([1], index=["foo"])
        df.loc[:, 1] = Series([1], index=["foo"])
        tm.assert_frame_equal(df, DataFrame([[1]], index=["foo"], columns=[1]))
        tm.assert_frame_equal(df, df2)

        # no index to start
        expected = DataFrame({0: Series(1, index=range(4))},
                             columns=["A", "B", 0])

        df = DataFrame(columns=["A", "B"])
        df[0] = Series(1, index=range(4))
        df.dtypes
        str(df)
        tm.assert_frame_equal(df, expected)

        df = DataFrame(columns=["A", "B"])
        df.loc[:, 0] = Series(1, index=range(4))
        df.dtypes
        str(df)
        tm.assert_frame_equal(df, expected)
示例#13
0
def test_dropna():
    # GH 6194
    idx = pd.MultiIndex.from_arrays([
        [1, np.nan, 3, np.nan, 5],
        [1, 2, np.nan, np.nan, 5],
        ["a", "b", "c", np.nan, "e"],
    ])

    exp = pd.MultiIndex.from_arrays([[1, 5], [1, 5], ["a", "e"]])
    tm.assert_index_equal(idx.dropna(), exp)
    tm.assert_index_equal(idx.dropna(how="any"), exp)

    exp = pd.MultiIndex.from_arrays([[1, np.nan, 3, 5], [1, 2, np.nan, 5],
                                     ["a", "b", "c", "e"]])
    tm.assert_index_equal(idx.dropna(how="all"), exp)

    msg = "invalid how option: xxx"
    with pytest.raises(ValueError, match=msg):
        idx.dropna(how="xxx")

    # GH26408
    # test if missing values are dropped for multiindex constructed
    # from codes and values
    idx = MultiIndex(
        levels=[[np.nan, None, pd.NaT, "128", 2],
                [np.nan, None, pd.NaT, "128", 2]],
        codes=[[0, -1, 1, 2, 3, 4], [0, -1, 3, 3, 3, 4]],
    )
    expected = MultiIndex.from_arrays([["128", 2], ["128", 2]])
    tm.assert_index_equal(idx.dropna(), expected)
    tm.assert_index_equal(idx.dropna(how="any"), expected)

    expected = MultiIndex.from_arrays([[np.nan, np.nan, "128", 2],
                                       ["128", "128", "128", 2]])
    tm.assert_index_equal(idx.dropna(how="all"), expected)
示例#14
0
 def test_astype_uint(self):
     arr = period_range("2000", periods=2)
     expected = UInt64Index(np.array([10957, 10958], dtype="uint64"))
     tm.assert_index_equal(arr.astype("uint64"), expected)
     tm.assert_index_equal(arr.astype("uint32"), expected)
示例#15
0
 def test_repr_roundtrip(self, indices):
     tm.assert_index_equal(eval(repr(indices)), indices)
示例#16
0
    def test_index(self):
        i = Index([23, 45, 18, 98, 43, 11], name="index")

        # Column indexed.
        output = Index(ujson.decode(ujson.encode(i)), name="index")
        tm.assert_index_equal(i, output)

        output = Index(ujson.decode(ujson.encode(i), numpy=True), name="index")
        tm.assert_index_equal(i, output)

        dec = _clean_dict(ujson.decode(ujson.encode(i, orient="split")))
        output = Index(**dec)

        tm.assert_index_equal(i, output)
        assert i.name == output.name

        dec = _clean_dict(
            ujson.decode(ujson.encode(i, orient="split"), numpy=True))
        output = Index(**dec)

        tm.assert_index_equal(i, output)
        assert i.name == output.name

        output = Index(ujson.decode(ujson.encode(i, orient="values")),
                       name="index")
        tm.assert_index_equal(i, output)

        output = Index(ujson.decode(ujson.encode(i, orient="values"),
                                    numpy=True),
                       name="index")
        tm.assert_index_equal(i, output)

        output = Index(ujson.decode(ujson.encode(i, orient="records")),
                       name="index")
        tm.assert_index_equal(i, output)

        output = Index(ujson.decode(ujson.encode(i, orient="records"),
                                    numpy=True),
                       name="index")
        tm.assert_index_equal(i, output)

        output = Index(ujson.decode(ujson.encode(i, orient="index")),
                       name="index")
        tm.assert_index_equal(i, output)

        output = Index(ujson.decode(ujson.encode(i, orient="index"),
                                    numpy=True),
                       name="index")
        tm.assert_index_equal(i, output)
示例#17
0
 def test_type_coercion_valid(self, float_dtype):
     # There is no Float32Index, so we always
     # generate Float64Index.
     i = Index([1, 2, 3.5], dtype=float_dtype)
     tm.assert_index_equal(i, Index([1, 2, 3.5]))
示例#18
0
 def test_constructor_from_series_dt64(self, klass):
     stamps = [Timestamp("20110101"), Timestamp("20120101"), Timestamp("20130101")]
     expected = DatetimeIndex(stamps)
     ser = Series(stamps)
     result = klass(ser)
     tm.assert_index_equal(result, expected)
示例#19
0
 def test_insert(self, nulls_fixture):
     # GH 18295 (test missing)
     index = self.create_index()
     expected = Float64Index([index[0], np.nan] + list(index[1:]))
     result = index.insert(1, nulls_fixture)
     tm.assert_index_equal(result, expected)
示例#20
0
 def test_construction_list_tuples_nan(self, na_value, vtype):
     # GH#18505 : valid tuples containing NaN
     values = [(1, "two"), (3.0, na_value)]
     result = Index(vtype(values))
     expected = MultiIndex.from_tuples(values)
     tm.assert_index_equal(result, expected)
示例#21
0
def test_drop(idx):
    dropped = idx.drop([("foo", "two"), ("qux", "one")])

    index = MultiIndex.from_tuples([("foo", "two"), ("qux", "one")])
    dropped2 = idx.drop(index)

    expected = idx[[0, 2, 3, 5]]
    tm.assert_index_equal(dropped, expected)
    tm.assert_index_equal(dropped2, expected)

    dropped = idx.drop(["bar"])
    expected = idx[[0, 1, 3, 4, 5]]
    tm.assert_index_equal(dropped, expected)

    dropped = idx.drop("foo")
    expected = idx[[2, 3, 4, 5]]
    tm.assert_index_equal(dropped, expected)

    index = MultiIndex.from_tuples([("bar", "two")])
    with pytest.raises(KeyError, match=r"^10$"):
        idx.drop([("bar", "two")])
    with pytest.raises(KeyError, match=r"^10$"):
        idx.drop(index)
    with pytest.raises(KeyError, match=r"^'two'$"):
        idx.drop(["foo", "two"])

    # partially correct argument
    mixed_index = MultiIndex.from_tuples([("qux", "one"), ("bar", "two")])
    with pytest.raises(KeyError, match=r"^10$"):
        idx.drop(mixed_index)

    # error='ignore'
    dropped = idx.drop(index, errors="ignore")
    expected = idx[[0, 1, 2, 3, 4, 5]]
    tm.assert_index_equal(dropped, expected)

    dropped = idx.drop(mixed_index, errors="ignore")
    expected = idx[[0, 1, 2, 3, 5]]
    tm.assert_index_equal(dropped, expected)

    dropped = idx.drop(["foo", "two"], errors="ignore")
    expected = idx[[2, 3, 4, 5]]
    tm.assert_index_equal(dropped, expected)

    # mixed partial / full drop
    dropped = idx.drop(["foo", ("qux", "one")])
    expected = idx[[2, 3, 5]]
    tm.assert_index_equal(dropped, expected)

    # mixed partial / full drop / error='ignore'
    mixed_index = ["foo", ("qux", "one"), "two"]
    with pytest.raises(KeyError, match=r"^'two'$"):
        idx.drop(mixed_index)
    dropped = idx.drop(mixed_index, errors="ignore")
    expected = idx[[2, 3, 5]]
    tm.assert_index_equal(dropped, expected)
示例#22
0
 def test_constructor_infer_periodindex(self):
     xp = period_range("2012-1-1", freq="M", periods=3)
     rs = Index(xp)
     tm.assert_index_equal(rs, xp)
     assert isinstance(rs, PeriodIndex)
示例#23
0
    def test_concatlike_same_dtypes(self, item):
        # GH 13660
        typ1, vals1 = item

        vals2 = vals1
        vals3 = vals1

        if typ1 == "category":
            exp_data = Categorical(list(vals1) + list(vals2))
            exp_data3 = Categorical(list(vals1) + list(vals2) + list(vals3))
        else:
            exp_data = vals1 + vals2
            exp_data3 = vals1 + vals2 + vals3

        # ----- Index ----- #

        # index.append
        res = Index(vals1).append(Index(vals2))
        exp = Index(exp_data)
        tm.assert_index_equal(res, exp)

        # 3 elements
        res = Index(vals1).append([Index(vals2), Index(vals3)])
        exp = Index(exp_data3)
        tm.assert_index_equal(res, exp)

        # index.append name mismatch
        i1 = Index(vals1, name="x")
        i2 = Index(vals2, name="y")
        res = i1.append(i2)
        exp = Index(exp_data)
        tm.assert_index_equal(res, exp)

        # index.append name match
        i1 = Index(vals1, name="x")
        i2 = Index(vals2, name="x")
        res = i1.append(i2)
        exp = Index(exp_data, name="x")
        tm.assert_index_equal(res, exp)

        # cannot append non-index
        with pytest.raises(TypeError, match="all inputs must be Index"):
            Index(vals1).append(vals2)

        with pytest.raises(TypeError, match="all inputs must be Index"):
            Index(vals1).append([Index(vals2), vals3])

        # ----- Series ----- #

        # series.append
        res = Series(vals1)._append(Series(vals2), ignore_index=True)
        exp = Series(exp_data)
        tm.assert_series_equal(res, exp, check_index_type=True)

        # concat
        res = pd.concat([Series(vals1), Series(vals2)], ignore_index=True)
        tm.assert_series_equal(res, exp, check_index_type=True)

        # 3 elements
        res = Series(vals1)._append(
            [Series(vals2), Series(vals3)], ignore_index=True)
        exp = Series(exp_data3)
        tm.assert_series_equal(res, exp)

        res = pd.concat(
            [Series(vals1), Series(vals2),
             Series(vals3)],
            ignore_index=True,
        )
        tm.assert_series_equal(res, exp)

        # name mismatch
        s1 = Series(vals1, name="x")
        s2 = Series(vals2, name="y")
        res = s1._append(s2, ignore_index=True)
        exp = Series(exp_data)
        tm.assert_series_equal(res, exp, check_index_type=True)

        res = pd.concat([s1, s2], ignore_index=True)
        tm.assert_series_equal(res, exp, check_index_type=True)

        # name match
        s1 = Series(vals1, name="x")
        s2 = Series(vals2, name="x")
        res = s1._append(s2, ignore_index=True)
        exp = Series(exp_data, name="x")
        tm.assert_series_equal(res, exp, check_index_type=True)

        res = pd.concat([s1, s2], ignore_index=True)
        tm.assert_series_equal(res, exp, check_index_type=True)

        # cannot append non-index
        msg = (r"cannot concatenate object of type '.+'; "
               "only Series and DataFrame objs are valid")
        with pytest.raises(TypeError, match=msg):
            Series(vals1)._append(vals2)

        with pytest.raises(TypeError, match=msg):
            Series(vals1)._append([Series(vals2), vals3])

        with pytest.raises(TypeError, match=msg):
            pd.concat([Series(vals1), vals2])

        with pytest.raises(TypeError, match=msg):
            pd.concat([Series(vals1), Series(vals2), vals3])
示例#24
0
    def test_drop_duplicates(self):

        idx = CategoricalIndex([0, 0, 0], name="foo")
        expected = CategoricalIndex([0], name="foo")
        tm.assert_index_equal(idx.drop_duplicates(), expected)
        tm.assert_index_equal(idx.unique(), expected)
示例#25
0
 def test_unique(self, arr, expected):
     result = arr.unique()
     tm.assert_index_equal(result, expected)
     # GH 21737
     # Ensure the underlying data is consistent
     assert result[0] == expected[0]
示例#26
0
    def test_method_delegation(self):

        ci = CategoricalIndex(list("aabbca"), categories=list("cabdef"))
        result = ci.set_categories(list("cab"))
        tm.assert_index_equal(
            result, CategoricalIndex(list("aabbca"), categories=list("cab")))

        ci = CategoricalIndex(list("aabbca"), categories=list("cab"))
        result = ci.rename_categories(list("efg"))
        tm.assert_index_equal(
            result, CategoricalIndex(list("ffggef"), categories=list("efg")))

        # GH18862 (let rename_categories take callables)
        result = ci.rename_categories(lambda x: x.upper())
        tm.assert_index_equal(
            result, CategoricalIndex(list("AABBCA"), categories=list("CAB")))

        ci = CategoricalIndex(list("aabbca"), categories=list("cab"))
        result = ci.add_categories(["d"])
        tm.assert_index_equal(
            result, CategoricalIndex(list("aabbca"), categories=list("cabd")))

        ci = CategoricalIndex(list("aabbca"), categories=list("cab"))
        result = ci.remove_categories(["c"])
        tm.assert_index_equal(
            result,
            CategoricalIndex(list("aabb") + [np.nan] + ["a"],
                             categories=list("ab")),
        )

        ci = CategoricalIndex(list("aabbca"), categories=list("cabdef"))
        result = ci.as_unordered()
        tm.assert_index_equal(result, ci)

        ci = CategoricalIndex(list("aabbca"), categories=list("cabdef"))
        result = ci.as_ordered()
        tm.assert_index_equal(
            result,
            CategoricalIndex(list("aabbca"),
                             categories=list("cabdef"),
                             ordered=True),
        )

        # invalid
        msg = "cannot use inplace with CategoricalIndex"
        with pytest.raises(ValueError, match=msg):
            ci.set_categories(list("cab"), inplace=True)
示例#27
0
 def test_categories(self):
     result = CategoricalDtype(["a", "b", "c"])
     tm.assert_index_equal(result.categories, pd.Index(["a", "b", "c"]))
     assert result.ordered is False
示例#28
0
    def test_join_non_int_index(self, index_large):
        other = Index(2**63 + np.array([1, 5, 7, 10, 20], dtype="uint64"),
                      dtype=object)

        outer = index_large.join(other, how="outer")
        outer2 = other.join(index_large, how="outer")
        expected = Index(
            2**63 + np.array([0, 1, 5, 7, 10, 15, 20, 25], dtype="uint64"))
        tm.assert_index_equal(outer, outer2)
        tm.assert_index_equal(outer, expected)

        inner = index_large.join(other, how="inner")
        inner2 = other.join(index_large, how="inner")
        expected = Index(2**63 + np.array([10, 20], dtype="uint64"))
        tm.assert_index_equal(inner, inner2)
        tm.assert_index_equal(inner, expected)

        left = index_large.join(other, how="left")
        tm.assert_index_equal(left, index_large.astype(object))

        left2 = other.join(index_large, how="left")
        tm.assert_index_equal(left2, other)

        right = index_large.join(other, how="right")
        tm.assert_index_equal(right, other)

        right2 = other.join(index_large, how="right")
        tm.assert_index_equal(right2, index_large.astype(object))
示例#29
0
    def test_dups_fancy_indexing(self):

        # GH 3455

        df = tm.makeCustomDataframe(10, 3)
        df.columns = ["a", "a", "b"]
        result = df[["b", "a"]].columns
        expected = Index(["b", "a", "a"])
        tm.assert_index_equal(result, expected)

        # across dtypes
        df = DataFrame([[1, 2, 1.0, 2.0, 3.0, "foo", "bar"]],
                       columns=list("aaaaaaa"))
        df.head()
        str(df)
        result = DataFrame([[1, 2, 1.0, 2.0, 3.0, "foo", "bar"]])
        result.columns = list("aaaaaaa")

        # TODO(wesm): unused?
        df_v = df.iloc[:, 4]  # noqa
        res_v = result.iloc[:, 4]  # noqa

        tm.assert_frame_equal(df, result)

        # GH 3561, dups not in selected order
        df = DataFrame(
            {
                "test": [5, 7, 9, 11],
                "test1": [4.0, 5, 6, 7],
                "other": list("abcd")
            },
            index=["A", "A", "B", "C"],
        )
        rows = ["C", "B"]
        expected = DataFrame(
            {
                "test": [11, 9],
                "test1": [7.0, 6],
                "other": ["d", "c"]
            },
            index=rows)
        result = df.loc[rows]
        tm.assert_frame_equal(result, expected)

        result = df.loc[Index(rows)]
        tm.assert_frame_equal(result, expected)

        rows = ["C", "B", "E"]
        with pytest.raises(KeyError, match="with any missing labels"):
            df.loc[rows]

        # see GH5553, make sure we use the right indexer
        rows = ["F", "G", "H", "C", "B", "E"]
        with pytest.raises(KeyError, match="with any missing labels"):
            df.loc[rows]

        # List containing only missing label
        dfnu = DataFrame(np.random.randn(5, 3), index=list("AABCD"))
        with pytest.raises(
                KeyError,
                match=re.escape(
                    "\"None of [Index(['E'], dtype='object')] are in the [index]\""
                ),
        ):
            dfnu.loc[["E"]]

        # ToDo: check_index_type can be True after GH 11497

        # GH 4619; duplicate indexer with missing label
        df = DataFrame({"A": [0, 1, 2]})
        with pytest.raises(KeyError, match="with any missing labels"):
            df.loc[[0, 8, 0]]

        df = DataFrame({"A": list("abc")})
        with pytest.raises(KeyError, match="with any missing labels"):
            df.loc[[0, 8, 0]]

        # non unique with non unique selector
        df = DataFrame({"test": [5, 7, 9, 11]}, index=["A", "A", "B", "C"])
        with pytest.raises(KeyError, match="with any missing labels"):
            df.loc[["A", "A", "E"]]
示例#30
0
文件: test_ops.py 项目: sduzjp/Python
    def test_value_counts_datetime64(self, index_or_series):
        klass = index_or_series

        # GH 3002, datetime64[ns]
        # don't test names though
        txt = "\n".join(
            [
                "xxyyzz20100101PIE",
                "xxyyzz20100101GUM",
                "xxyyzz20100101EGG",
                "xxyyww20090101EGG",
                "foofoo20080909PIE",
                "foofoo20080909GUM",
            ]
        )
        f = StringIO(txt)
        df = pd.read_fwf(
            f, widths=[6, 8, 3], names=["person_id", "dt", "food"], parse_dates=["dt"]
        )

        s = klass(df["dt"].copy())
        s.name = None
        idx = pd.to_datetime(
            ["2010-01-01 00:00:00", "2008-09-09 00:00:00", "2009-01-01 00:00:00"]
        )
        expected_s = Series([3, 2, 1], index=idx)
        tm.assert_series_equal(s.value_counts(), expected_s)

        expected = np_array_datetime64_compat(
            ["2010-01-01 00:00:00", "2009-01-01 00:00:00", "2008-09-09 00:00:00"],
            dtype="datetime64[ns]",
        )
        if isinstance(s, Index):
            tm.assert_index_equal(s.unique(), DatetimeIndex(expected))
        else:
            tm.assert_numpy_array_equal(s.unique(), expected)

        assert s.nunique() == 3

        # with NaT
        s = df["dt"].copy()
        s = klass(list(s.values) + [pd.NaT])

        result = s.value_counts()
        assert result.index.dtype == "datetime64[ns]"
        tm.assert_series_equal(result, expected_s)

        result = s.value_counts(dropna=False)
        expected_s[pd.NaT] = 1
        tm.assert_series_equal(result, expected_s)

        unique = s.unique()
        assert unique.dtype == "datetime64[ns]"

        # numpy_array_equal cannot compare pd.NaT
        if isinstance(s, Index):
            exp_idx = DatetimeIndex(expected.tolist() + [pd.NaT])
            tm.assert_index_equal(unique, exp_idx)
        else:
            tm.assert_numpy_array_equal(unique[:3], expected)
            assert pd.isna(unique[3])

        assert s.nunique() == 3
        assert s.nunique(dropna=False) == 4

        # timedelta64[ns]
        td = df.dt - df.dt + timedelta(1)
        td = klass(td, name="dt")

        result = td.value_counts()
        expected_s = Series([6], index=[Timedelta("1day")], name="dt")
        tm.assert_series_equal(result, expected_s)

        expected = TimedeltaIndex(["1 days"], name="dt")
        if isinstance(td, Index):
            tm.assert_index_equal(td.unique(), expected)
        else:
            tm.assert_numpy_array_equal(td.unique(), expected.values)

        td2 = timedelta(1) + (df.dt - df.dt)
        td2 = klass(td2, name="dt")
        result2 = td2.value_counts()
        tm.assert_series_equal(result2, expected_s)