def test_onehost_get_dummies_dummy_na(nan_as_null, dummy_na): pdf = pd.DataFrame({"a": [0, 1, np.nan]}) df = DataFrame.from_pandas(pdf, nan_as_null=nan_as_null) expected = pd.get_dummies(pdf, dummy_na=dummy_na, columns=["a"]) got = cudf.get_dummies(df, dummy_na=dummy_na, columns=["a"]) if dummy_na and nan_as_null: got = got.rename(columns={"a_null": "a_nan"})[expected.columns] utils.assert_eq(expected, got)
def test_from_pandas_with_index(): pdf = pd.DataFrame({"a": [0, 1, 2, 3], "b": [0.1, 0.2, None, 0.3]}) pdf = pdf.set_index(np.asarray([4, 3, 2, 1])) df = DataFrame.from_pandas(pdf) # Check columns np.testing.assert_array_equal(df.a.to_array(fillna="pandas"), pdf.a) np.testing.assert_array_equal(df.b.to_array(fillna="pandas"), pdf.b) # Check index np.testing.assert_array_equal(df.index.values, pdf.index.values) # Check again using pandas testing tool on frames pd.util.testing.assert_frame_equal(df.to_pandas(), pdf)
def test_from_pandas_with_index(): pdf = pd.DataFrame({"a": [0, 1, 2, 3], "b": [0.1, 0.2, None, 0.3]}) pdf = pdf.set_index(np.asarray([4, 3, 2, 1])) df = DataFrame.from_pandas(pdf) # Check columns assert_eq(df.a, pdf.a) assert_eq(df.b, pdf.b) # Check index assert_eq(df.index.values, pdf.index.values) # Check again using pandas testing tool on frames assert_eq(df, pdf)
def test_groupby_std(): raw_data = { "x": [1, 2, 3, 1, 2, 2, 1, None, 3, 2], "y": [None, 1, 2, 3, 4, None, 6, 7, 8, 9], } pdf = pd.DataFrame(raw_data) gdf = DataFrame.from_pandas(pdf) pdg = pdf.groupby("x") gdg = gdf.groupby("x") pdresult = pdg.std() gdresult = gdg.std() assert_groupby_results_equal(pdresult, gdresult)
def test_from_pandas_ex1(): pdf = pd.DataFrame({"a": [0, 1, 2, 3], "b": [0.1, 0.2, None, 0.3]}) print(pdf) df = DataFrame.from_pandas(pdf) print(df) assert tuple(df.columns) == tuple(pdf.columns) assert np.all(df["a"].to_array() == pdf["a"]) matches = df["b"].to_array(fillna="pandas") == pdf["b"] # the 3d element is False due to (nan == nan) == False assert np.all(matches == [True, True, False, True]) assert np.isnan(df["b"].to_array(fillna="pandas")[2]) assert np.isnan(pdf["b"][2])
def test_dataframe_with_nulls_where_with_scalars(fill_value): pdf = pd.DataFrame( { "A": [-1, 2, -3, None, 5, 6, -7, 0], "B": [4, -2, 3, None, 7, 6, 8, 0], } ) gdf = DataFrame.from_pandas(pdf) expect = pdf.where(pdf % 3 == 0, fill_value) got = gdf.where(gdf % 3 == 0, fill_value) assert_eq(expect, got)
def test_dataframe_clip(lower, upper, inplace): pdf = pd.DataFrame( {"a": [1, 2, 3, 4, 5], "b": [7.1, 7.24, 7.5, 7.8, 8.11]} ) gdf = DataFrame.from_pandas(pdf) got = gdf.clip(lower=lower, upper=upper, inplace=inplace) expect = pdf.clip(lower=lower, upper=upper, axis=1) if inplace is True: assert_eq(expect, gdf) else: assert_eq(expect, got)
def test_categorical_index(): pdf = pd.DataFrame() pdf["a"] = [1, 2, 3] pdf["index"] = pd.Categorical(["a", "b", "c"]) initial_df = DataFrame.from_pandas(pdf) pdf = pdf.set_index("index") gdf1 = DataFrame.from_pandas(pdf) gdf2 = DataFrame() gdf2["a"] = [1, 2, 3] gdf2["index"] = pd.Categorical(["a", "b", "c"]) assert_eq(initial_df.index, gdf2.index) gdf2 = gdf2.set_index("index") assert isinstance(gdf1.index, CategoricalIndex) assert_eq(pdf, gdf1) assert_eq(pdf.index, gdf1.index) assert_eq(pdf.index.codes, gdf1.index.codes.to_array()) assert isinstance(gdf2.index, CategoricalIndex) assert_eq(pdf, gdf2) assert_eq(pdf.index, gdf2.index) assert_eq(pdf.index.codes, gdf2.index.codes.to_array())
def test_dataframe_category_clip(lower, upper, inplace): data = ["a", "b", "c", "d", "e"] pdf = pd.DataFrame({"a": data}) gdf = DataFrame.from_pandas(pdf) gdf["a"] = gdf["a"].astype("category") expect = pdf.clip(lower=lower, upper=upper) got = gdf.clip(lower=lower, upper=upper, inplace=inplace) if inplace is True: assert_eq(expect, gdf.astype("str")) else: assert_eq(expect, got.astype("str"))
def test_multiindex_clip(lower, upper, inplace): df = pd.DataFrame({"a": [1, 2, 3, 4, 5], "b": [1, 2, 3, 4, 5]}) gdf = DataFrame.from_pandas(df) index = gdf.set_index(["a", "b"]).index expected = df.clip(lower=lower, upper=upper, inplace=inplace, axis=1) got = index.clip(lower=lower, upper=upper, inplace=inplace) if inplace is True: assert_eq(df, index.to_frame(index=False)) else: assert_eq(expected, got.to_frame(index=False))
def test_safe_merging_with_left_empty(): import numpy as np from cudf import DataFrame import pandas as pd np.random.seed(0) pairs = ("bcd", "b") pdf_left = pd.DataFrame() pdf_right = pd.DataFrame() for left_column in pairs[0]: pdf_left[left_column] = np.random.randint(0, 10, 0) for right_column in pairs[1]: pdf_right[right_column] = np.random.randint(0, 10, 5) gdf_left = DataFrame.from_pandas(pdf_left) gdf_right = DataFrame.from_pandas(pdf_right) pdf_result = pdf_left.merge(pdf_right) gdf_result = gdf_left.merge(gdf_right) # Simplify test because pandas does not consider empty Index and RangeIndex # to be equivalent. TODO: Allow empty Index objects to have equivalence. assert len(pdf_result) == len(gdf_result)
def test_rank_error_arguments(self): pdf = pd.DataFrame(index=self.index) pdf["col1"] = self.col1 pdf["col2"] = self.col2 gdf = DataFrame.from_pandas(pdf) assert_exceptions_equal( lfunc=pdf["col1"].rank, rfunc=gdf["col1"].rank, lfunc_args_and_kwargs=( [], { "method": "randomname", "na_option": "keep", "ascending": True, "pct": True, }, ), rfunc_args_and_kwargs=( [], { "method": "randomname", "na_option": "keep", "ascending": True, "pct": True, }, ), ) assert_exceptions_equal( lfunc=pdf["col1"].rank, rfunc=gdf["col1"].rank, lfunc_args_and_kwargs=( [], { "method": "first", "na_option": "randomname", "ascending": True, "pct": True, }, ), rfunc_args_and_kwargs=( [], { "method": "first", "na_option": "randomname", "ascending": True, "pct": True, }, ), )
def test_string_join_values_nulls(): left_dict = [ {"b": "MATCH 1", "a": 1.0}, {"b": "MATCH 1", "a": 1.0}, {"b": "LEFT NO MATCH 1", "a": -1.0}, {"b": "MATCH 2", "a": 2.0}, {"b": "MATCH 2", "a": 2.0}, {"b": "MATCH 1", "a": 1.0}, {"b": "MATCH 1", "a": 1.0}, {"b": "MATCH 2", "a": 2.0}, {"b": "MATCH 2", "a": 2.0}, {"b": "LEFT NO MATCH 2", "a": -2.0}, {"b": "MATCH 3", "a": 3.0}, {"b": "MATCH 3", "a": 3.0}, ] right_dict = [ {"b": "RIGHT NO MATCH 1", "c": -1.0}, {"b": "MATCH 3", "c": 3.0}, {"b": "MATCH 2", "c": 2.0}, {"b": "RIGHT NO MATCH 2", "c": -2.0}, {"b": "RIGHT NO MATCH 3", "c": -3.0}, {"b": "MATCH 1", "c": 1.0}, ] left_pdf = pd.DataFrame(left_dict) right_pdf = pd.DataFrame(right_dict) left_gdf = DataFrame.from_pandas(left_pdf) right_gdf = DataFrame.from_pandas(right_pdf) expect = left_pdf.merge(right_pdf, how="left", on="b") got = left_gdf.merge(right_gdf, how="left", on="b") expect = expect.sort_values(by=["a", "b", "c"]).reset_index(drop=True) got = got.sort_values(by=["a", "b", "c"]).reset_index(drop=True) assert_eq(expect, got)
def test_dataframe_join_mismatch_cats(how): pdf1 = pd.DataFrame( { "join_col": ["a", "b", "c", "d", "e"], "data_col_left": [10, 20, 30, 40, 50], } ) pdf2 = pd.DataFrame( {"join_col": ["c", "e", "f"], "data_col_right": [6, 7, 8]} ) pdf1["join_col"] = pdf1["join_col"].astype("category") pdf2["join_col"] = pdf2["join_col"].astype("category") gdf1 = DataFrame.from_pandas(pdf1) gdf2 = DataFrame.from_pandas(pdf2) gdf1 = gdf1.set_index("join_col") gdf2 = gdf2.set_index("join_col") pdf1 = pdf1.set_index("join_col") pdf2 = pdf2.set_index("join_col") join_gdf = gdf1.join(gdf2, how=how, sort=True, method="hash") join_pdf = pdf1.join(pdf2, how=how) got = join_gdf.to_pandas() expect = join_pdf.fillna(-1) # note: cudf join doesn't mask NA # We yield a categorical here whereas pandas gives Object. expect.index = expect.index.astype("category") # cudf creates the columns in different order than pandas for right join if how == "right": got = got[["data_col_left", "data_col_right"]] expect.data_col_right = expect.data_col_right.astype(np.int64) expect.data_col_left = expect.data_col_left.astype(np.int64) assert_eq(expect, got)
def test_dataframe_replace(): # numerical pdf1 = pd.DataFrame({"a": [0, 1, 2, 3], "b": [0, 1, 2, 3]}) gdf1 = DataFrame.from_pandas(pdf1) pdf2 = pdf1.replace(0, 4) gdf2 = gdf1.replace(0, 4) pd.testing.assert_frame_equal(gdf2.to_pandas(), pdf2) # categorical pdf4 = pd.DataFrame( { "a": ["one", "two", "three"], "b": ["one", "two", "three"] }, dtype="category", ) gdf4 = DataFrame.from_pandas(pdf4) pdf5 = pdf4.replace("two", "three") gdf5 = gdf4.replace("two", "three") pd.testing.assert_frame_equal(gdf5.to_pandas(), pdf5) # list input pdf6 = pdf1.replace([0, 1], [4, 5]) gdf6 = gdf1.replace([0, 1], [4, 5]) pd.testing.assert_frame_equal(gdf6.to_pandas(), pdf6) pdf7 = pdf1.replace([0, 1], 4) gdf7 = gdf1.replace([0, 1], 4) pd.testing.assert_frame_equal(gdf7.to_pandas(), pdf7) # dict input: pdf8 = pdf1.replace({"a": 0, "b": 0}, {"a": 4, "b": 5}) gdf8 = gdf1.replace({"a": 0, "b": 0}, {"a": 4, "b": 5}) pd.testing.assert_frame_equal(gdf8.to_pandas(), pdf8) pdf9 = pdf1.replace({"a": 0}, {"a": 4}) gdf9 = gdf1.replace({"a": 0}, {"a": 4}) pd.testing.assert_frame_equal(gdf9.to_pandas(), pdf9)
def test_query_with_index_keyword(query, a_val, b_val, c_val): pdf = pd.DataFrame({ "a": [1, None, 3, 4, 5], "b": [5, 4, 3, 2, 1], "c": [12, 15, 17, 19, 27], }) pdf.set_index("a") gdf = DataFrame.from_pandas(pdf) out = gdf.query(query) expect = pdf.query(query) assert_eq(out, expect)
def test_multiindex_sample_basic(n, frac, replace, axis): # as we currently don't support column with same name if axis == 1 and replace: return pdf = pd.DataFrame( { "a": [1, 2, 3, 4, 5], "float": [0.05, 0.2, 0.3, 0.2, 0.25], "int": [1, 3, 5, 4, 2], }, ) mul_index = cudf.Index(DataFrame.from_pandas(pdf)) random_state = 0 kind = None try: pout = pdf.sample( n=n, frac=frac, replace=replace, random_state=random_state, axis=axis, ) except BaseException as e: kind = type(e) msg = str(e) if kind is not None: with pytest.raises(kind, match=msg): gout = mul_index.sample( n=n, frac=frac, replace=replace, random_state=random_state, axis=axis, ) else: gout = mul_index.sample( n=n, frac=frac, replace=replace, random_state=random_state, axis=axis, ) if kind is not None: return assert pout.shape == gout.shape
def test_from_pandas(): pdf = pd.DataFrame() pdf["a"] = np.arange(10, dtype=np.int32) pdf["b"] = np.arange(10, 20, dtype=np.float64) df = DataFrame.from_pandas(pdf) assert tuple(df.columns) == tuple(pdf.columns) assert df["a"].dtype == pdf["a"].dtype assert df["b"].dtype == pdf["b"].dtype assert len(df["a"]) == len(pdf["a"]) assert len(df["b"]) == len(pdf["b"])
def test_dataframe_pairs_of_triples(pairs, max, rows, how): np.random.seed(0) pdf_left = pd.DataFrame() pdf_right = pd.DataFrame() for left_column in pairs[0]: pdf_left[left_column] = np.random.randint(0, max, rows) for right_column in pairs[1]: pdf_right[right_column] = np.random.randint(0, max, rows) gdf_left = DataFrame.from_pandas(pdf_left) gdf_right = DataFrame.from_pandas(pdf_right) if not set(pdf_left.columns).intersection(pdf_right.columns): with pytest.raises(pd.core.reshape.merge.MergeError) as raises: pdf_left.merge(pdf_right) raises.match("No common columns to perform merge on") with pytest.raises(ValueError) as raises: gdf_left.merge(gdf_right) raises.match("No common columns to perform merge on") elif not [value for value in pdf_left if value in pdf_right]: with pytest.raises(pd.core.reshape.merge.MergeError) as raises: pdf_left.merge(pdf_right) raises.match("No common columns to perform merge on") with pytest.raises(ValueError) as raises: gdf_left.merge(gdf_right) raises.match("No common columns to perform merge on") else: pdf_result = pdf_left.merge(pdf_right, how=how) gdf_result = gdf_left.merge(gdf_right, how=how) assert np.array_equal(gdf_result.columns, pdf_result.columns) for column in gdf_result: gdf_col_result_sorted = gdf_result[column].fillna(-1).sort_values() pd_col_result_sorted = pdf_result[column].fillna(-1).sort_values() assert np.array_equal( gdf_col_result_sorted.to_pandas().values, pd_col_result_sorted.values, )
def test_index_join(lhs, rhs, how, level): l_pdf = pd.DataFrame({"a": [2, 3, 1, 4], "b": [3, 7, 8, 1]}) r_pdf = pd.DataFrame({"a": [1, 5, 4, 0], "b": [3, 9, 8, 4]}) l_df = DataFrame.from_pandas(l_pdf) r_df = DataFrame.from_pandas(r_pdf) p_lhs = l_pdf.set_index(lhs).index p_rhs = r_pdf.set_index(rhs).index g_lhs = l_df.set_index(lhs).index g_rhs = r_df.set_index(rhs).index expected = ( p_lhs.join(p_rhs, level=level, how=how) .to_frame(index=False) .sort_values(by=lhs) .reset_index(drop=True) ) got = ( g_lhs.join(g_rhs, level=level, how=how) .to_frame(index=False) .sort_values(by=lhs) .reset_index(drop=True) ) assert_eq(expected, got)
def test_rank_error_arguments(self): pdf = pd.DataFrame(index=self.index) pdf["col1"] = self.col1 pdf["col2"] = self.col2 gdf = DataFrame.from_pandas(pdf) with pytest.raises(KeyError): gdf["col1"].rank(method="randomname", na_option="keep", ascending=True, pct=True) with pytest.raises(KeyError): gdf["col1"].rank( method="first", na_option="randomname", ascending=True, pct=True, )
def test_groupby_std(): raw_data = { "x": [1, 2, 3, 1, 2, 2, 1, None, 3, 2], "y": [None, 1, 2, 3, 4, None, 6, 7, 8, 9], } pdf = pd.DataFrame(raw_data) gdf = DataFrame.from_pandas(pdf) pdg = pdf.groupby("x") gdg = gdf.groupby("x") pdresult = pdg.std() gdresult = gdg.std() # There's a lot left to add to python bindings like index name # so this is a temporary workaround pdresult = pdresult["y"].reset_index(drop=True) gdresult = gdresult["y"].reset_index(drop=True) assert_eq(pdresult, gdresult)
def test_melt(nulls, num_id_vars, num_value_vars, num_rows, dtype): if dtype not in ["float32", "float64"] and nulls in ["some", "all"]: pytest.skip(msg="nulls not supported in dtype: " + dtype) pdf = pd.DataFrame() id_vars = [] for i in range(num_id_vars): colname = "id" + str(i) data = np.random.randint(0, 26, num_rows).astype(dtype) if nulls == "some": idx = np.random.choice(num_rows, size=int(num_rows / 2), replace=False) data[idx] = np.nan elif nulls == "all": data[:] = np.nan pdf[colname] = data id_vars.append(colname) value_vars = [] for i in range(num_value_vars): colname = "val" + str(i) data = np.random.randint(0, 26, num_rows).astype(dtype) if nulls == "some": idx = np.random.choice(num_rows, size=int(num_rows / 2), replace=False) data[idx] = np.nan elif nulls == "all": data[:] = np.nan pdf[colname] = data value_vars.append(colname) gdf = DataFrame.from_pandas(pdf) got = cudf_melt(frame=gdf, id_vars=id_vars, value_vars=value_vars) got_from_melt_method = gdf.melt(id_vars=id_vars, value_vars=value_vars) expect = pd.melt(frame=pdf, id_vars=id_vars, value_vars=value_vars) # pandas' melt makes the 'variable' column of 'object' type (string) # cuDF's melt makes it Categorical because it doesn't support strings expect["variable"] = expect["variable"].astype("category") assert_eq(expect, got) assert_eq(expect, got_from_melt_method)
def test_string_index(): pdf = pd.DataFrame(np.random.rand(5, 5)) gdf = DataFrame.from_pandas(pdf) stringIndex = ["a", "b", "c", "d", "e"] pdf.index = stringIndex gdf.index = stringIndex assert_eq(pdf, gdf) stringIndex = np.array(["a", "b", "c", "d", "e"]) pdf.index = stringIndex gdf.index = stringIndex assert_eq(pdf, gdf) stringIndex = StringIndex(["a", "b", "c", "d", "e"], name="name") pdf.index = stringIndex gdf.index = stringIndex assert_eq(pdf, gdf) stringIndex = StringColumn(["a", "b", "c", "d", "e"], name="name") pdf.index = stringIndex gdf.index = stringIndex assert_eq(pdf, gdf)
def test_dataframe_multi_column(num_cols, num_rows, dtype, ascending, na_position): np.random.seed(0) by = list(string.ascii_lowercase[:num_cols]) pdf = pd.DataFrame() for i in range(5): colname = string.ascii_lowercase[i] data = np.random.randint(0, 26, num_rows).astype(dtype) pdf[colname] = data gdf = DataFrame.from_pandas(pdf) got = gdf.sort_values(by, ascending=ascending, na_position=na_position) expect = pdf.sort_values(by, ascending=ascending, na_position=na_position) assert_eq(got[by].reset_index(drop=True), expect[by].reset_index(drop=True))
def test_rank_all_arguments( pdf, dtype, ascending, method, na_option, pct, numeric_only ): if method == "first" and dtype == "O": # not supported by pandas return pdf = pdf.copy(deep=True) # for parallel pytest if numeric_only: pdf["str"] = np.array( ["a", "b", "c", "d", "e", "1", "2", "3", "4", "5"] ) gdf = DataFrame.from_pandas(pdf) kwargs = { "method": method, "na_option": na_option, "ascending": ascending, "pct": pct, "numeric_only": numeric_only, } # Series assert_eq(gdf["col1"].rank(**kwargs), pdf["col1"].rank(**kwargs)) assert_eq(gdf["col2"].rank(**kwargs), pdf["col2"].rank(**kwargs)) if numeric_only: expect = pdf["str"].rank(**kwargs) got = gdf["str"].rank(**kwargs) assert expect.empty == got.empty # TODO: https://github.com/pandas-dev/pandas/issues/32593 # Dataframe (bug in pandas) if ( na_option == "top" and method == "first" and not dtype == "O" and ascending ): assert_eq(gdf.rank(**kwargs), pdf.rank(**kwargs)) else: with pytest.raises(AssertionError, match="values are different"): assert_eq(gdf.rank(**kwargs), pdf.rank(**kwargs))
def test_rank_all_arguments(self, dtype, ascending, method, na_option, pct): if method == "first" and dtype == "O": # not supported by pandas return pdf = pd.DataFrame(index=self.index) pdf["col1"] = self.col1.astype(dtype) pdf["col2"] = self.col2.astype(dtype) gdf = DataFrame.from_pandas(pdf) def _check(gs, ps, method, na_option, ascending, pct): ranked_gs = gs.rank( method=method, na_option=na_option, ascending=ascending, pct=pct, ) ranked_ps = ps.rank( method=method, na_option=na_option, ascending=ascending, pct=pct, ) assert_eq(ranked_ps, ranked_gs.to_pandas()) # # Series _check( gdf["col1"], pdf["col1"], method=method, na_option=na_option, ascending=ascending, pct=pct, ) _check( gdf["col2"], pdf["col2"], method=method, na_option=na_option, ascending=ascending, pct=pct, )
def test_issue_165(): df_pandas = pd.DataFrame() start_date = dt.datetime.strptime("2000-10-21", "%Y-%m-%d") data = [(start_date + dt.timedelta(days=x)) for x in range(6)] df_pandas["dates"] = data df_pandas["num"] = [1, 2, 3, 4, 5, 6] df_cudf = DataFrame.from_pandas(df_pandas) base = df_pandas.query("dates==@start_date") test = df_cudf.query("dates==@start_date") assert_frame_equal(base, test.to_pandas()) assert len(test) > 0 mask = df_cudf.dates == start_date base_mask = df_pandas.dates == start_date assert_series_equal(mask.to_pandas(), base_mask, check_names=False) assert mask.to_pandas().sum() > 0 start_date_ts = pd.Timestamp(start_date) test = df_cudf.query("dates==@start_date_ts") base = df_pandas.query("dates==@start_date_ts") assert_frame_equal(base, test.to_pandas()) assert len(test) > 0 mask = df_cudf.dates == start_date_ts base_mask = df_pandas.dates == start_date_ts assert_series_equal(mask.to_pandas(), base_mask, check_names=False) assert mask.to_pandas().sum() > 0 start_date_np = np.datetime64(start_date_ts, "ns") test = df_cudf.query("dates==@start_date_np") base = df_pandas.query("dates==@start_date_np") assert_frame_equal(base, test.to_pandas()) assert len(test) > 0 mask = df_cudf.dates == start_date_np base_mask = df_pandas.dates == start_date_np assert_series_equal(mask.to_pandas(), base_mask, check_names=False) assert mask.to_pandas().sum() > 0
def test_query_splitted_combine(): np.random.seed(0) df = pd.DataFrame({ "x": np.random.randint(0, 5, size=10), "y": np.random.normal(size=10) }) gdf = DataFrame.from_pandas(df) # Split the GDF s1 = gdf[:5] s2 = gdf[5:] # Do the query expr = "x > 2" q1 = s1.query(expr) q2 = s2.query(expr) # Combine got = cudf.concat([q1, q2]).to_pandas() # Should equal to just querying the original GDF expect = gdf.query(expr).to_pandas() assert_eq(got, expect)
def test_string_index(): from cudf.core.column import as_column from cudf.core.index import as_index pdf = pd.DataFrame(np.random.rand(5, 5)) gdf = DataFrame.from_pandas(pdf) stringIndex = ["a", "b", "c", "d", "e"] pdf.index = stringIndex gdf.index = stringIndex assert_eq(pdf, gdf) stringIndex = np.array(["a", "b", "c", "d", "e"]) pdf.index = stringIndex gdf.index = stringIndex assert_eq(pdf, gdf) stringIndex = StringIndex(["a", "b", "c", "d", "e"], name="name") pdf.index = stringIndex gdf.index = stringIndex assert_eq(pdf, gdf) stringIndex = as_index(as_column(["a", "b", "c", "d", "e"]), name="name") pdf.index = stringIndex gdf.index = stringIndex assert_eq(pdf, gdf)