def test_typecast_on_join_indexes_matching_categorical(): join_data_l = Series(["a", "b", "c", "d", "e"], dtype="category") join_data_r = Series(["a", "b", "c", "d", "e"], dtype="str") other_data = [1, 2, 3, 4, 5] gdf_l = DataFrame({"join_col": join_data_l, "B": other_data}) gdf_r = DataFrame({"join_col": join_data_r, "B": other_data}) gdf_l = gdf_l.set_index("join_col") gdf_r = gdf_r.set_index("join_col") exp_join_data = ["a", "b", "c", "d", "e"] exp_other_data = [1, 2, 3, 4, 5] expect = DataFrame( { "join_col": exp_join_data, "B_x": exp_other_data, "B_y": exp_other_data, } ) expect = expect.set_index("join_col") got = gdf_l.join(gdf_r, how="inner", lsuffix="_x", rsuffix="_y") assert_eq(expect, got)
def test_dataframe_join_cats(): lhs = DataFrame() lhs["a"] = pd.Categorical(list("aababcabbc"), categories=list("abc")) lhs["b"] = bb = np.arange(len(lhs)) lhs = lhs.set_index("a") rhs = DataFrame() rhs["a"] = pd.Categorical(list("abcac"), categories=list("abc")) rhs["c"] = cc = np.arange(len(rhs)) rhs = rhs.set_index("a") got = lhs.join(rhs) expect = lhs.to_pandas().join(rhs.to_pandas()) # Note: pandas make a object Index after joining pd.util.testing.assert_frame_equal( got.sort_values(by="b") .to_pandas() .sort_index() .reset_index(drop=True), expect.reset_index(drop=True), ) # Just do some rough checking here. assert list(got.columns) == ["b", "c"] assert len(got) > 0 assert set(got.index.to_pandas()) & set("abc") assert set(got["b"]) & set(bb) assert set(got["c"]) & set(cc)
def test_typecast_on_join_multiindices(): join_data_l_0 = Series([1, 2, 3, 4, 5], dtype="int8") join_data_l_1 = Series([2, 3, 4.1, 5.9, 6], dtype="float32") join_data_l_2 = Series([7, 8, 9, 0, 1], dtype="float32") join_data_r_0 = Series([1, 2, 3, 4, 5], dtype="int32") join_data_r_1 = Series([2, 3, 4, 5, 6], dtype="int32") join_data_r_2 = Series([7, 8, 9, 0, 0], dtype="float64") other_data = ["a", "b", "c", "d", "e"] gdf_l = DataFrame( { "join_col_0": join_data_l_0, "join_col_1": join_data_l_1, "join_col_2": join_data_l_2, "B": other_data, } ) gdf_r = DataFrame( { "join_col_0": join_data_r_0, "join_col_1": join_data_r_1, "join_col_2": join_data_r_2, "B": other_data, } ) gdf_l = gdf_l.set_index(["join_col_0", "join_col_1", "join_col_2"]) gdf_r = gdf_r.set_index(["join_col_0", "join_col_1", "join_col_2"]) exp_join_data_0 = Series([1, 2], dtype="int32") exp_join_data_1 = Series([2, 3], dtype="float64") exp_join_data_2 = Series([7, 8], dtype="float64") exp_other_data = Series(["a", "b"]) expect = DataFrame( { "join_col_0": exp_join_data_0, "join_col_1": exp_join_data_1, "join_col_2": exp_join_data_2, "B_x": exp_other_data, "B_y": exp_other_data, } ) expect = expect.set_index(["join_col_0", "join_col_1", "join_col_2"]) got = gdf_l.join(gdf_r, how="inner", lsuffix="_x", rsuffix="_y") assert_eq(expect, got)