def test_duplicated_drop_duplicates_index(self): # GH 4060 for original in self.objs: if isinstance(original, Index): # special case if original.is_boolean(): result = original.drop_duplicates() expected = Index([False, True], name="a") tm.assert_index_equal(result, expected) continue # original doesn't have duplicates expected = np.array([False] * len(original), dtype=bool) duplicated = original.duplicated() tm.assert_numpy_array_equal(duplicated, expected) assert duplicated.dtype == bool result = original.drop_duplicates() tm.assert_index_equal(result, original) assert result is not original # has_duplicates assert not original.has_duplicates # create repeated values, 3rd and 5th values are duplicated idx = original[list(range(len(original))) + [5, 3]] expected = np.array([False] * len(original) + [True, True], dtype=bool) duplicated = idx.duplicated() tm.assert_numpy_array_equal(duplicated, expected) assert duplicated.dtype == bool tm.assert_index_equal(idx.drop_duplicates(), original) base = [False] * len(idx) base[3] = True base[5] = True expected = np.array(base) duplicated = idx.duplicated(keep="last") tm.assert_numpy_array_equal(duplicated, expected) assert duplicated.dtype == bool result = idx.drop_duplicates(keep="last") tm.assert_index_equal(result, idx[~expected]) base = [False] * len(original) + [True, True] base[3] = True base[5] = True expected = np.array(base) duplicated = idx.duplicated(keep=False) tm.assert_numpy_array_equal(duplicated, expected) assert duplicated.dtype == bool result = idx.drop_duplicates(keep=False) tm.assert_index_equal(result, idx[~expected]) with pytest.raises( TypeError, match=r"drop_duplicates\(\) got an unexpected keyword argument", ): idx.drop_duplicates(inplace=True) else: expected = Series( [False] * len(original), index=original.index, name="a" ) tm.assert_series_equal(original.duplicated(), expected) result = original.drop_duplicates() tm.assert_series_equal(result, original) assert result is not original idx = original.index[list(range(len(original))) + [5, 3]] values = original._values[list(range(len(original))) + [5, 3]] s = Series(values, index=idx, name="a") expected = Series( [False] * len(original) + [True, True], index=idx, name="a" ) tm.assert_series_equal(s.duplicated(), expected) tm.assert_series_equal(s.drop_duplicates(), original) base = [False] * len(idx) base[3] = True base[5] = True expected = Series(base, index=idx, name="a") tm.assert_series_equal(s.duplicated(keep="last"), expected) tm.assert_series_equal( s.drop_duplicates(keep="last"), s[~np.array(base)] ) base = [False] * len(original) + [True, True] base[3] = True base[5] = True expected = Series(base, index=idx, name="a") tm.assert_series_equal(s.duplicated(keep=False), expected) tm.assert_series_equal( s.drop_duplicates(keep=False), s[~np.array(base)] ) s.drop_duplicates(inplace=True) tm.assert_series_equal(s, original)
def test_basic(self, categories, ordered): c1 = CategoricalDtype(categories, ordered=ordered) tm.assert_index_equal(c1.categories, pd.Index(categories)) assert c1.ordered is ordered
def test_categorical_categories(self): # GH17884 c1 = CategoricalDtype(Categorical(["a", "b"])) tm.assert_index_equal(c1.categories, pd.Index(["a", "b"])) c1 = CategoricalDtype(CategoricalIndex(["a", "b"])) tm.assert_index_equal(c1.categories, pd.Index(["a", "b"]))
def test_concatlike_dtypes_coercion(self, item, item2): # GH 13660 typ1, vals1 = item typ2, vals2 = item2 vals3 = vals2 # basically infer exp_index_dtype = None exp_series_dtype = None if typ1 == typ2: # same dtype is tested in test_concatlike_same_dtypes return elif typ1 == "category" or typ2 == "category": # The `vals1 + vals2` below fails bc one of these is a Categorical # instead of a list; we have separate dedicated tests for categorical return warn = None # specify expected dtype if typ1 == "bool" and typ2 in ("int64", "float64"): # series coerces to numeric based on numpy rule # index doesn't because bool is object dtype exp_series_dtype = typ2 warn = FutureWarning elif typ2 == "bool" and typ1 in ("int64", "float64"): exp_series_dtype = typ1 warn = FutureWarning elif (typ1 == "datetime64[ns, US/Eastern]" or typ2 == "datetime64[ns, US/Eastern]" or typ1 == "timedelta64[ns]" or typ2 == "timedelta64[ns]"): exp_index_dtype = object exp_series_dtype = object exp_data = vals1 + vals2 exp_data3 = vals1 + vals2 + vals3 # ----- Index ----- # # index.append res = Index(vals1).append(Index(vals2)) exp = Index(exp_data, dtype=exp_index_dtype) tm.assert_index_equal(res, exp) # 3 elements res = Index(vals1).append([Index(vals2), Index(vals3)]) exp = Index(exp_data3, dtype=exp_index_dtype) tm.assert_index_equal(res, exp) # ----- Series ----- # # series._append with tm.assert_produces_warning(warn, match="concatenating bool-dtype"): # GH#39817 res = Series(vals1)._append(Series(vals2), ignore_index=True) exp = Series(exp_data, dtype=exp_series_dtype) tm.assert_series_equal(res, exp, check_index_type=True) # concat with tm.assert_produces_warning(warn, match="concatenating bool-dtype"): # GH#39817 res = pd.concat([Series(vals1), Series(vals2)], ignore_index=True) tm.assert_series_equal(res, exp, check_index_type=True) # 3 elements with tm.assert_produces_warning(warn, match="concatenating bool-dtype"): # GH#39817 res = Series(vals1)._append( [Series(vals2), Series(vals3)], ignore_index=True) exp = Series(exp_data3, dtype=exp_series_dtype) tm.assert_series_equal(res, exp) with tm.assert_produces_warning(warn, match="concatenating bool-dtype"): # GH#39817 res = pd.concat( [Series(vals1), Series(vals2), Series(vals3)], ignore_index=True, ) tm.assert_series_equal(res, exp)
def test_factorize(self): idx1 = DatetimeIndex( ["2014-01", "2014-01", "2014-02", "2014-02", "2014-03", "2014-03"]) exp_arr = np.array([0, 0, 1, 1, 2, 2], dtype=np.intp) exp_idx = DatetimeIndex(["2014-01", "2014-02", "2014-03"]) arr, idx = idx1.factorize() tm.assert_numpy_array_equal(arr, exp_arr) tm.assert_index_equal(idx, exp_idx) arr, idx = idx1.factorize(sort=True) tm.assert_numpy_array_equal(arr, exp_arr) tm.assert_index_equal(idx, exp_idx) # tz must be preserved idx1 = idx1.tz_localize("Asia/Tokyo") exp_idx = exp_idx.tz_localize("Asia/Tokyo") arr, idx = idx1.factorize() tm.assert_numpy_array_equal(arr, exp_arr) tm.assert_index_equal(idx, exp_idx) idx2 = pd.DatetimeIndex( ["2014-03", "2014-03", "2014-02", "2014-01", "2014-03", "2014-01"]) exp_arr = np.array([2, 2, 1, 0, 2, 0], dtype=np.intp) exp_idx = DatetimeIndex(["2014-01", "2014-02", "2014-03"]) arr, idx = idx2.factorize(sort=True) tm.assert_numpy_array_equal(arr, exp_arr) tm.assert_index_equal(idx, exp_idx) exp_arr = np.array([0, 0, 1, 2, 0, 2], dtype=np.intp) exp_idx = DatetimeIndex(["2014-03", "2014-02", "2014-01"]) arr, idx = idx2.factorize() tm.assert_numpy_array_equal(arr, exp_arr) tm.assert_index_equal(idx, exp_idx) # freq must be preserved idx3 = date_range("2000-01", periods=4, freq="M", tz="Asia/Tokyo") exp_arr = np.array([0, 1, 2, 3], dtype=np.intp) arr, idx = idx3.factorize() tm.assert_numpy_array_equal(arr, exp_arr) tm.assert_index_equal(idx, idx3)
def test_astype_from_object(self): index = Index([1.0, np.nan, 0.2], dtype="object") result = index.astype(float) expected = Float64Index([1.0, np.nan, 0.2]) assert result.dtype == expected.dtype tm.assert_index_equal(result, expected)
def test_join_non_int_index(self): index = self.create_index() other = Index([3, 6, 7, 8, 10], dtype=object) outer = index.join(other, how="outer") outer2 = other.join(index, how="outer") expected = Index([0, 2, 3, 4, 6, 7, 8, 10, 12, 14, 16, 18]) tm.assert_index_equal(outer, outer2) tm.assert_index_equal(outer, expected) inner = index.join(other, how="inner") inner2 = other.join(index, how="inner") expected = Index([6, 8, 10]) tm.assert_index_equal(inner, inner2) tm.assert_index_equal(inner, expected) left = index.join(other, how="left") tm.assert_index_equal(left, index.astype(object)) left2 = other.join(index, how="left") tm.assert_index_equal(left2, other) right = index.join(other, how="right") tm.assert_index_equal(right, other) right2 = other.join(index, how="right") tm.assert_index_equal(right2, index.astype(object))
def test_pivot_integer_bug(self): df = DataFrame(data=[("A", "1", "A1"), ("B", "2", "B2")]) result = df.pivot(index=1, columns=0, values=2) repr(result) tm.assert_index_equal(result.columns, Index(["A", "B"], name=0))
def test_astype_column_metadata(self, dtype): # GH#19920 columns = UInt64Index([100, 200, 300], name="foo") df = DataFrame(np.arange(15).reshape(5, 3), columns=columns) df = df.astype(dtype) tm.assert_index_equal(df.columns, columns)
def test_constructor_iso(self): # GH #21877 expected = timedelta_range("1s", periods=9, freq="s") durations = [f"P0DT0H0M{i}S" for i in range(1, 10)] result = to_timedelta(durations) tm.assert_index_equal(result, expected)
def test_float64_unit_conversion(self): # GH#23539 tdi = TimedeltaIndex([1.5, 2.25], unit="D") expected = TimedeltaIndex([Timedelta(days=1.5), Timedelta(days=2.25)]) tm.assert_index_equal(tdi, expected)
def test_partial_set_empty_frame(self): # partially set with an empty object # frame df = DataFrame() with pytest.raises(ValueError): df.loc[1] = 1 with pytest.raises(ValueError): df.loc[1] = Series([1], index=["foo"]) with pytest.raises(ValueError): df.loc[:, 1] = 1 # these work as they don't really change # anything but the index # GH5632 expected = DataFrame(columns=["foo"], index=Index([], dtype="object")) def f(): df = DataFrame(index=Index([], dtype="object")) df["foo"] = Series([], dtype="object") return df tm.assert_frame_equal(f(), expected) def f(): df = DataFrame() df["foo"] = Series(df.index) return df tm.assert_frame_equal(f(), expected) def f(): df = DataFrame() df["foo"] = df.index return df tm.assert_frame_equal(f(), expected) expected = DataFrame(columns=["foo"], index=Index([], dtype="int64")) expected["foo"] = expected["foo"].astype("float64") def f(): df = DataFrame(index=Index([], dtype="int64")) df["foo"] = [] return df tm.assert_frame_equal(f(), expected) def f(): df = DataFrame(index=Index([], dtype="int64")) df["foo"] = Series(np.arange(len(df)), dtype="float64") return df tm.assert_frame_equal(f(), expected) def f(): df = DataFrame(index=Index([], dtype="int64")) df["foo"] = range(len(df)) return df expected = DataFrame(columns=["foo"], index=Index([], dtype="int64")) expected["foo"] = expected["foo"].astype("float64") tm.assert_frame_equal(f(), expected) df = DataFrame() tm.assert_index_equal(df.columns, Index([], dtype=object)) df2 = DataFrame() df2[1] = Series([1], index=["foo"]) df.loc[:, 1] = Series([1], index=["foo"]) tm.assert_frame_equal(df, DataFrame([[1]], index=["foo"], columns=[1])) tm.assert_frame_equal(df, df2) # no index to start expected = DataFrame({0: Series(1, index=range(4))}, columns=["A", "B", 0]) df = DataFrame(columns=["A", "B"]) df[0] = Series(1, index=range(4)) df.dtypes str(df) tm.assert_frame_equal(df, expected) df = DataFrame(columns=["A", "B"]) df.loc[:, 0] = Series(1, index=range(4)) df.dtypes str(df) tm.assert_frame_equal(df, expected)
def test_dropna(): # GH 6194 idx = pd.MultiIndex.from_arrays([ [1, np.nan, 3, np.nan, 5], [1, 2, np.nan, np.nan, 5], ["a", "b", "c", np.nan, "e"], ]) exp = pd.MultiIndex.from_arrays([[1, 5], [1, 5], ["a", "e"]]) tm.assert_index_equal(idx.dropna(), exp) tm.assert_index_equal(idx.dropna(how="any"), exp) exp = pd.MultiIndex.from_arrays([[1, np.nan, 3, 5], [1, 2, np.nan, 5], ["a", "b", "c", "e"]]) tm.assert_index_equal(idx.dropna(how="all"), exp) msg = "invalid how option: xxx" with pytest.raises(ValueError, match=msg): idx.dropna(how="xxx") # GH26408 # test if missing values are dropped for multiindex constructed # from codes and values idx = MultiIndex( levels=[[np.nan, None, pd.NaT, "128", 2], [np.nan, None, pd.NaT, "128", 2]], codes=[[0, -1, 1, 2, 3, 4], [0, -1, 3, 3, 3, 4]], ) expected = MultiIndex.from_arrays([["128", 2], ["128", 2]]) tm.assert_index_equal(idx.dropna(), expected) tm.assert_index_equal(idx.dropna(how="any"), expected) expected = MultiIndex.from_arrays([[np.nan, np.nan, "128", 2], ["128", "128", "128", 2]]) tm.assert_index_equal(idx.dropna(how="all"), expected)
def test_astype_uint(self): arr = period_range("2000", periods=2) expected = UInt64Index(np.array([10957, 10958], dtype="uint64")) tm.assert_index_equal(arr.astype("uint64"), expected) tm.assert_index_equal(arr.astype("uint32"), expected)
def test_repr_roundtrip(self, indices): tm.assert_index_equal(eval(repr(indices)), indices)
def test_index(self): i = Index([23, 45, 18, 98, 43, 11], name="index") # Column indexed. output = Index(ujson.decode(ujson.encode(i)), name="index") tm.assert_index_equal(i, output) output = Index(ujson.decode(ujson.encode(i), numpy=True), name="index") tm.assert_index_equal(i, output) dec = _clean_dict(ujson.decode(ujson.encode(i, orient="split"))) output = Index(**dec) tm.assert_index_equal(i, output) assert i.name == output.name dec = _clean_dict( ujson.decode(ujson.encode(i, orient="split"), numpy=True)) output = Index(**dec) tm.assert_index_equal(i, output) assert i.name == output.name output = Index(ujson.decode(ujson.encode(i, orient="values")), name="index") tm.assert_index_equal(i, output) output = Index(ujson.decode(ujson.encode(i, orient="values"), numpy=True), name="index") tm.assert_index_equal(i, output) output = Index(ujson.decode(ujson.encode(i, orient="records")), name="index") tm.assert_index_equal(i, output) output = Index(ujson.decode(ujson.encode(i, orient="records"), numpy=True), name="index") tm.assert_index_equal(i, output) output = Index(ujson.decode(ujson.encode(i, orient="index")), name="index") tm.assert_index_equal(i, output) output = Index(ujson.decode(ujson.encode(i, orient="index"), numpy=True), name="index") tm.assert_index_equal(i, output)
def test_type_coercion_valid(self, float_dtype): # There is no Float32Index, so we always # generate Float64Index. i = Index([1, 2, 3.5], dtype=float_dtype) tm.assert_index_equal(i, Index([1, 2, 3.5]))
def test_constructor_from_series_dt64(self, klass): stamps = [Timestamp("20110101"), Timestamp("20120101"), Timestamp("20130101")] expected = DatetimeIndex(stamps) ser = Series(stamps) result = klass(ser) tm.assert_index_equal(result, expected)
def test_insert(self, nulls_fixture): # GH 18295 (test missing) index = self.create_index() expected = Float64Index([index[0], np.nan] + list(index[1:])) result = index.insert(1, nulls_fixture) tm.assert_index_equal(result, expected)
def test_construction_list_tuples_nan(self, na_value, vtype): # GH#18505 : valid tuples containing NaN values = [(1, "two"), (3.0, na_value)] result = Index(vtype(values)) expected = MultiIndex.from_tuples(values) tm.assert_index_equal(result, expected)
def test_drop(idx): dropped = idx.drop([("foo", "two"), ("qux", "one")]) index = MultiIndex.from_tuples([("foo", "two"), ("qux", "one")]) dropped2 = idx.drop(index) expected = idx[[0, 2, 3, 5]] tm.assert_index_equal(dropped, expected) tm.assert_index_equal(dropped2, expected) dropped = idx.drop(["bar"]) expected = idx[[0, 1, 3, 4, 5]] tm.assert_index_equal(dropped, expected) dropped = idx.drop("foo") expected = idx[[2, 3, 4, 5]] tm.assert_index_equal(dropped, expected) index = MultiIndex.from_tuples([("bar", "two")]) with pytest.raises(KeyError, match=r"^10$"): idx.drop([("bar", "two")]) with pytest.raises(KeyError, match=r"^10$"): idx.drop(index) with pytest.raises(KeyError, match=r"^'two'$"): idx.drop(["foo", "two"]) # partially correct argument mixed_index = MultiIndex.from_tuples([("qux", "one"), ("bar", "two")]) with pytest.raises(KeyError, match=r"^10$"): idx.drop(mixed_index) # error='ignore' dropped = idx.drop(index, errors="ignore") expected = idx[[0, 1, 2, 3, 4, 5]] tm.assert_index_equal(dropped, expected) dropped = idx.drop(mixed_index, errors="ignore") expected = idx[[0, 1, 2, 3, 5]] tm.assert_index_equal(dropped, expected) dropped = idx.drop(["foo", "two"], errors="ignore") expected = idx[[2, 3, 4, 5]] tm.assert_index_equal(dropped, expected) # mixed partial / full drop dropped = idx.drop(["foo", ("qux", "one")]) expected = idx[[2, 3, 5]] tm.assert_index_equal(dropped, expected) # mixed partial / full drop / error='ignore' mixed_index = ["foo", ("qux", "one"), "two"] with pytest.raises(KeyError, match=r"^'two'$"): idx.drop(mixed_index) dropped = idx.drop(mixed_index, errors="ignore") expected = idx[[2, 3, 5]] tm.assert_index_equal(dropped, expected)
def test_constructor_infer_periodindex(self): xp = period_range("2012-1-1", freq="M", periods=3) rs = Index(xp) tm.assert_index_equal(rs, xp) assert isinstance(rs, PeriodIndex)
def test_concatlike_same_dtypes(self, item): # GH 13660 typ1, vals1 = item vals2 = vals1 vals3 = vals1 if typ1 == "category": exp_data = Categorical(list(vals1) + list(vals2)) exp_data3 = Categorical(list(vals1) + list(vals2) + list(vals3)) else: exp_data = vals1 + vals2 exp_data3 = vals1 + vals2 + vals3 # ----- Index ----- # # index.append res = Index(vals1).append(Index(vals2)) exp = Index(exp_data) tm.assert_index_equal(res, exp) # 3 elements res = Index(vals1).append([Index(vals2), Index(vals3)]) exp = Index(exp_data3) tm.assert_index_equal(res, exp) # index.append name mismatch i1 = Index(vals1, name="x") i2 = Index(vals2, name="y") res = i1.append(i2) exp = Index(exp_data) tm.assert_index_equal(res, exp) # index.append name match i1 = Index(vals1, name="x") i2 = Index(vals2, name="x") res = i1.append(i2) exp = Index(exp_data, name="x") tm.assert_index_equal(res, exp) # cannot append non-index with pytest.raises(TypeError, match="all inputs must be Index"): Index(vals1).append(vals2) with pytest.raises(TypeError, match="all inputs must be Index"): Index(vals1).append([Index(vals2), vals3]) # ----- Series ----- # # series.append res = Series(vals1)._append(Series(vals2), ignore_index=True) exp = Series(exp_data) tm.assert_series_equal(res, exp, check_index_type=True) # concat res = pd.concat([Series(vals1), Series(vals2)], ignore_index=True) tm.assert_series_equal(res, exp, check_index_type=True) # 3 elements res = Series(vals1)._append( [Series(vals2), Series(vals3)], ignore_index=True) exp = Series(exp_data3) tm.assert_series_equal(res, exp) res = pd.concat( [Series(vals1), Series(vals2), Series(vals3)], ignore_index=True, ) tm.assert_series_equal(res, exp) # name mismatch s1 = Series(vals1, name="x") s2 = Series(vals2, name="y") res = s1._append(s2, ignore_index=True) exp = Series(exp_data) tm.assert_series_equal(res, exp, check_index_type=True) res = pd.concat([s1, s2], ignore_index=True) tm.assert_series_equal(res, exp, check_index_type=True) # name match s1 = Series(vals1, name="x") s2 = Series(vals2, name="x") res = s1._append(s2, ignore_index=True) exp = Series(exp_data, name="x") tm.assert_series_equal(res, exp, check_index_type=True) res = pd.concat([s1, s2], ignore_index=True) tm.assert_series_equal(res, exp, check_index_type=True) # cannot append non-index msg = (r"cannot concatenate object of type '.+'; " "only Series and DataFrame objs are valid") with pytest.raises(TypeError, match=msg): Series(vals1)._append(vals2) with pytest.raises(TypeError, match=msg): Series(vals1)._append([Series(vals2), vals3]) with pytest.raises(TypeError, match=msg): pd.concat([Series(vals1), vals2]) with pytest.raises(TypeError, match=msg): pd.concat([Series(vals1), Series(vals2), vals3])
def test_drop_duplicates(self): idx = CategoricalIndex([0, 0, 0], name="foo") expected = CategoricalIndex([0], name="foo") tm.assert_index_equal(idx.drop_duplicates(), expected) tm.assert_index_equal(idx.unique(), expected)
def test_unique(self, arr, expected): result = arr.unique() tm.assert_index_equal(result, expected) # GH 21737 # Ensure the underlying data is consistent assert result[0] == expected[0]
def test_method_delegation(self): ci = CategoricalIndex(list("aabbca"), categories=list("cabdef")) result = ci.set_categories(list("cab")) tm.assert_index_equal( result, CategoricalIndex(list("aabbca"), categories=list("cab"))) ci = CategoricalIndex(list("aabbca"), categories=list("cab")) result = ci.rename_categories(list("efg")) tm.assert_index_equal( result, CategoricalIndex(list("ffggef"), categories=list("efg"))) # GH18862 (let rename_categories take callables) result = ci.rename_categories(lambda x: x.upper()) tm.assert_index_equal( result, CategoricalIndex(list("AABBCA"), categories=list("CAB"))) ci = CategoricalIndex(list("aabbca"), categories=list("cab")) result = ci.add_categories(["d"]) tm.assert_index_equal( result, CategoricalIndex(list("aabbca"), categories=list("cabd"))) ci = CategoricalIndex(list("aabbca"), categories=list("cab")) result = ci.remove_categories(["c"]) tm.assert_index_equal( result, CategoricalIndex(list("aabb") + [np.nan] + ["a"], categories=list("ab")), ) ci = CategoricalIndex(list("aabbca"), categories=list("cabdef")) result = ci.as_unordered() tm.assert_index_equal(result, ci) ci = CategoricalIndex(list("aabbca"), categories=list("cabdef")) result = ci.as_ordered() tm.assert_index_equal( result, CategoricalIndex(list("aabbca"), categories=list("cabdef"), ordered=True), ) # invalid msg = "cannot use inplace with CategoricalIndex" with pytest.raises(ValueError, match=msg): ci.set_categories(list("cab"), inplace=True)
def test_categories(self): result = CategoricalDtype(["a", "b", "c"]) tm.assert_index_equal(result.categories, pd.Index(["a", "b", "c"])) assert result.ordered is False
def test_join_non_int_index(self, index_large): other = Index(2**63 + np.array([1, 5, 7, 10, 20], dtype="uint64"), dtype=object) outer = index_large.join(other, how="outer") outer2 = other.join(index_large, how="outer") expected = Index( 2**63 + np.array([0, 1, 5, 7, 10, 15, 20, 25], dtype="uint64")) tm.assert_index_equal(outer, outer2) tm.assert_index_equal(outer, expected) inner = index_large.join(other, how="inner") inner2 = other.join(index_large, how="inner") expected = Index(2**63 + np.array([10, 20], dtype="uint64")) tm.assert_index_equal(inner, inner2) tm.assert_index_equal(inner, expected) left = index_large.join(other, how="left") tm.assert_index_equal(left, index_large.astype(object)) left2 = other.join(index_large, how="left") tm.assert_index_equal(left2, other) right = index_large.join(other, how="right") tm.assert_index_equal(right, other) right2 = other.join(index_large, how="right") tm.assert_index_equal(right2, index_large.astype(object))
def test_dups_fancy_indexing(self): # GH 3455 df = tm.makeCustomDataframe(10, 3) df.columns = ["a", "a", "b"] result = df[["b", "a"]].columns expected = Index(["b", "a", "a"]) tm.assert_index_equal(result, expected) # across dtypes df = DataFrame([[1, 2, 1.0, 2.0, 3.0, "foo", "bar"]], columns=list("aaaaaaa")) df.head() str(df) result = DataFrame([[1, 2, 1.0, 2.0, 3.0, "foo", "bar"]]) result.columns = list("aaaaaaa") # TODO(wesm): unused? df_v = df.iloc[:, 4] # noqa res_v = result.iloc[:, 4] # noqa tm.assert_frame_equal(df, result) # GH 3561, dups not in selected order df = DataFrame( { "test": [5, 7, 9, 11], "test1": [4.0, 5, 6, 7], "other": list("abcd") }, index=["A", "A", "B", "C"], ) rows = ["C", "B"] expected = DataFrame( { "test": [11, 9], "test1": [7.0, 6], "other": ["d", "c"] }, index=rows) result = df.loc[rows] tm.assert_frame_equal(result, expected) result = df.loc[Index(rows)] tm.assert_frame_equal(result, expected) rows = ["C", "B", "E"] with pytest.raises(KeyError, match="with any missing labels"): df.loc[rows] # see GH5553, make sure we use the right indexer rows = ["F", "G", "H", "C", "B", "E"] with pytest.raises(KeyError, match="with any missing labels"): df.loc[rows] # List containing only missing label dfnu = DataFrame(np.random.randn(5, 3), index=list("AABCD")) with pytest.raises( KeyError, match=re.escape( "\"None of [Index(['E'], dtype='object')] are in the [index]\"" ), ): dfnu.loc[["E"]] # ToDo: check_index_type can be True after GH 11497 # GH 4619; duplicate indexer with missing label df = DataFrame({"A": [0, 1, 2]}) with pytest.raises(KeyError, match="with any missing labels"): df.loc[[0, 8, 0]] df = DataFrame({"A": list("abc")}) with pytest.raises(KeyError, match="with any missing labels"): df.loc[[0, 8, 0]] # non unique with non unique selector df = DataFrame({"test": [5, 7, 9, 11]}, index=["A", "A", "B", "C"]) with pytest.raises(KeyError, match="with any missing labels"): df.loc[["A", "A", "E"]]
def test_value_counts_datetime64(self, index_or_series): klass = index_or_series # GH 3002, datetime64[ns] # don't test names though txt = "\n".join( [ "xxyyzz20100101PIE", "xxyyzz20100101GUM", "xxyyzz20100101EGG", "xxyyww20090101EGG", "foofoo20080909PIE", "foofoo20080909GUM", ] ) f = StringIO(txt) df = pd.read_fwf( f, widths=[6, 8, 3], names=["person_id", "dt", "food"], parse_dates=["dt"] ) s = klass(df["dt"].copy()) s.name = None idx = pd.to_datetime( ["2010-01-01 00:00:00", "2008-09-09 00:00:00", "2009-01-01 00:00:00"] ) expected_s = Series([3, 2, 1], index=idx) tm.assert_series_equal(s.value_counts(), expected_s) expected = np_array_datetime64_compat( ["2010-01-01 00:00:00", "2009-01-01 00:00:00", "2008-09-09 00:00:00"], dtype="datetime64[ns]", ) if isinstance(s, Index): tm.assert_index_equal(s.unique(), DatetimeIndex(expected)) else: tm.assert_numpy_array_equal(s.unique(), expected) assert s.nunique() == 3 # with NaT s = df["dt"].copy() s = klass(list(s.values) + [pd.NaT]) result = s.value_counts() assert result.index.dtype == "datetime64[ns]" tm.assert_series_equal(result, expected_s) result = s.value_counts(dropna=False) expected_s[pd.NaT] = 1 tm.assert_series_equal(result, expected_s) unique = s.unique() assert unique.dtype == "datetime64[ns]" # numpy_array_equal cannot compare pd.NaT if isinstance(s, Index): exp_idx = DatetimeIndex(expected.tolist() + [pd.NaT]) tm.assert_index_equal(unique, exp_idx) else: tm.assert_numpy_array_equal(unique[:3], expected) assert pd.isna(unique[3]) assert s.nunique() == 3 assert s.nunique(dropna=False) == 4 # timedelta64[ns] td = df.dt - df.dt + timedelta(1) td = klass(td, name="dt") result = td.value_counts() expected_s = Series([6], index=[Timedelta("1day")], name="dt") tm.assert_series_equal(result, expected_s) expected = TimedeltaIndex(["1 days"], name="dt") if isinstance(td, Index): tm.assert_index_equal(td.unique(), expected) else: tm.assert_numpy_array_equal(td.unique(), expected.values) td2 = timedelta(1) + (df.dt - df.dt) td2 = klass(td2, name="dt") result2 = td2.value_counts() tm.assert_series_equal(result2, expected_s)