def test_multiindex_droplevel_simple(pdfIndex, level): gdfIndex = cudf.from_pandas(pdfIndex) assert_eq(pdfIndex.droplevel(level), gdfIndex.droplevel(level))
def test_serialize_named_series(): gdf = cudf.DataFrame({"a": [1, 2, 3, 4], "b": [5, 1, 2, 5]}) ser = gdf["b"] recreated = cudf.Series.deserialize(*ser.serialize()) assert_eq(recreated, ser)
def test_serialize_list_columns(data): df = cudf.DataFrame(data) recreated = df.__class__.deserialize(*df.serialize()) assert_eq(recreated, df)
def test_list_to_pandas_nullable_true(): df = cudf.DataFrame({"a": cudf.Series([[1, 2, 3]])}) actual = df.to_pandas(nullable=True) expected = pd.DataFrame({"a": pd.Series([[1, 2, 3]])}) assert_eq(actual, expected)
def test_serialize_generic_index(): index = cudf.core.index.GenericIndex(cudf.Series(np.arange(10))) outindex = cudf.core.index.GenericIndex.deserialize(*index.serialize()) assert_eq(index, outindex)
def test_create_list_series(data): expect = pd.Series(data) got = cudf.Series(data) assert_eq(expect, got)
def test_contains_null_search_key(data, expect): sr = cudf.Series(data) expect = cudf.Series(expect, dtype="bool") got = sr.list.contains(cudf.Scalar(cudf.NA, sr.dtype.element_type)) assert_eq(expect, got)
def test_multiindex_getitem(pdf, gdf, pdfIndex): gdfIndex = cudf.from_pandas(pdfIndex) pdf.index = pdfIndex gdf.index = gdfIndex assert_eq(pdf.index[0], gdf.index[0])
def test_multiindex_loc(pdf, gdf, pdfIndex, key_tuple): gdfIndex = cudf.from_pandas(pdfIndex) assert_eq(pdfIndex, gdfIndex) pdf.index = pdfIndex gdf.index = gdfIndex assert_eq(pdf.loc[key_tuple], gdf.loc[key_tuple])
def test_series_multiindex(pdfIndex): ps = pd.Series(np.random.rand(7)) gs = cudf.from_pandas(ps) ps.index = pdfIndex gs.index = cudf.from_pandas(pdfIndex) assert_eq(ps, gs)
def test_multiindex_take(pdf, gdf, pdfIndex): gdfIndex = cudf.from_pandas(pdfIndex) pdf.index = pdfIndex gdf.index = gdfIndex assert_eq(pdf.index.take([0]), gdf.index.take([0])) assert_eq(pdf.index.take(np.array([0])), gdf.index.take(np.array([0]))) from cudf import Series assert_eq(pdf.index.take(pd.Series([0])), gdf.index.take(Series([0]))) assert_eq(pdf.index.take([0, 1]), gdf.index.take([0, 1])) assert_eq(pdf.index.take(np.array([0, 1])), gdf.index.take(np.array([0, 1]))) assert_eq(pdf.index.take(pd.Series([0, 1])), gdf.index.take(Series([0, 1])))
def test_multiindex_transpose(pdf, pdfIndex): pdf.index = pdfIndex gdf = cudf.from_pandas(pdf) assert_eq(pdf.transpose(), gdf.transpose())
def test_from_pandas(pdf, pdfIndex): pdf.index = pdfIndex gdf = cudf.from_pandas(pdf) assert_eq(pdf, gdf)
def test_multiindex_droplevel_index(pdfIndex, level): level = list(level) gdfIndex = cudf.from_pandas(pdfIndex) assert_eq(pdfIndex.droplevel(level), gdfIndex.droplevel(level))
def test_cdt_basic(): psr = pd.Series(["a", "b", "a", "c"], dtype="category") sr = cudf.Series(["a", "b", "a", "c"], dtype="category") assert isinstance(sr.dtype, CategoricalDtype) assert_eq(sr.dtype.categories, psr.dtype.categories)
def test_multiindex_from_tuples(): arrays = [["a", "a", "b", "b"], ["house", "store", "house", "store"]] tuples = list(zip(*arrays)) pmi = pd.MultiIndex.from_tuples(tuples) gmi = cudf.MultiIndex.from_tuples(tuples) assert_eq(pmi, gmi)
def test_get(data, index, expect): sr = cudf.Series(data) expect = cudf.Series(expect) got = sr.list.get(index) assert_eq(expect, got)
def test_multiindex_from_product(arrays): pmi = pd.MultiIndex.from_product(arrays, names=["alpha", "location"]) gmi = cudf.MultiIndex.from_product(arrays, names=["alpha", "location"]) assert_eq(pmi, gmi)
def test_contains_scalar(data, scalar, expect): sr = cudf.Series(data) expect = cudf.Series(expect) got = sr.list.contains(cudf.Scalar(scalar, sr.dtype.element_type)) assert_eq(expect, got)
def test_multiindex_reset_index(pdf, gdf, pdfIndex): gdfIndex = cudf.from_pandas(pdfIndex) pdf.index = pdfIndex gdf.index = gdfIndex assert_eq(pdf.reset_index(), gdf.reset_index())
def test_df_list_dtypes(data): expect = pd.DataFrame(data) got = cudf.DataFrame(data) assert_eq(expect, got)
def test_groupby_multiindex_columns_from_pandas(pdf, gdf, pdfIndex): gdfIndex = cudf.from_pandas(pdfIndex) pdf.index = pdfIndex gdf.index = gdfIndex assert_eq(gdf, pdf) assert_eq(gdf.T, pdf.T)
def test_serialize_range_index(): index = cudf.core.index.RangeIndex(10, 20) outindex = cudf.core.index.RangeIndex.deserialize(*index.serialize()) assert_eq(index, outindex)
def test_multiindex_rows_with_wildcard(pdf, gdf, pdfIndex): gdfIndex = cudf.from_pandas(pdfIndex) pdf.index = pdfIndex gdf.index = gdfIndex assert_eq(pdf.loc[("a", ), :], gdf.loc[("a", ), :]) assert_eq(pdf.loc[(("a"), ("store")), :], gdf.loc[(("a"), ("store")), :]) assert_eq( pdf.loc[(("a"), ("store"), ("storm")), :], gdf.loc[(("a"), ("store"), ("storm")), :], ) assert_eq( pdf.loc[(("a"), ("store"), ("storm"), ("smoke")), :], gdf.loc[(("a"), ("store"), ("storm"), ("smoke")), :], ) assert_eq(pdf.loc[(slice(None), "store"), :], gdf.loc[(slice(None), "store"), :]) assert_eq( pdf.loc[(slice(None), slice(None), "storm"), :], gdf.loc[(slice(None), slice(None), "storm"), :], ) assert_eq( pdf.loc[(slice(None), slice(None), slice(None), "smoke"), :], gdf.loc[(slice(None), slice(None), slice(None), "smoke"), :], )
def test_serialize_empty(frames): gdf, pdf = frames typ = type(gdf) res = typ.deserialize(*gdf.serialize()) assert_eq(res, gdf)
def test_polygon_bounding_boxes_small(dtype): result = cuspatial.polygon_bounding_boxes( cudf.Series([0, 1, 2, 3]), cudf.Series([0, 3, 8, 12]), cudf.Series( [ # ring 1 2.488450, 1.333584, 3.460720, # ring 2 5.039823, 5.561707, 7.103516, 7.190674, 5.998939, # ring 3 5.998939, 5.573720, 6.703534, 5.998939, # ring 4 2.088115, 1.034892, 2.415080, 3.208660, 2.088115, ], dtype=dtype, ), cudf.Series( [ # ring 1 5.856625, 5.008840, 4.586599, # ring 2 4.229242, 1.825073, 1.503906, 4.025879, 5.653384, # ring 3 1.235638, 0.197808, 0.086693, 1.235638, # ring 4 4.541529, 3.530299, 2.896937, 3.745936, 4.541529, ], dtype=dtype, ), ) assert_eq( result, cudf.DataFrame({ "x_min": cudf.Series( [ 1.3335840000000001, 5.0398230000000002, 5.5737199999999998, 1.0348919999999999, ], dtype=dtype, ), "y_min": cudf.Series( [ 4.5865989999999996, 1.503906, 0.086693000000000006, 2.8969369999999999, ], dtype=dtype, ), "x_max": cudf.Series( [ 3.4607199999999998, 7.1906739999999996, 6.7035340000000003, 3.2086600000000001, ], dtype=dtype, ), "y_max": cudf.Series( [ 5.8566250000000002, 5.653384, 1.235638, 4.5415289999999997, ], dtype=dtype, ), }), )
def test_serialize_seriesgroupby(): gdf = cudf.DataFrame({"a": [1, 2, 3, 4], "b": [5, 1, 2, 5]}) gb = gdf.groupby(["a"]).b recreated = gb.__class__.deserialize(*gb.serialize()) assert_eq(recreated.sum(), gb.sum())
def test_replace_inplace(): data = np.array([5, 1, 2, 3, 4]) sr = cudf.Series(data) psr = pd.Series(data) sr_copy = sr.copy() psr_copy = psr.copy() assert_eq(sr, psr) assert_eq(sr_copy, psr_copy) sr.replace(5, 0, inplace=True) psr.replace(5, 0, inplace=True) assert_eq(sr, psr) assert_eq(sr_copy, psr_copy) sr = cudf.Series(data) psr = pd.Series(data) sr_copy = sr.copy() psr_copy = psr.copy() assert_eq(sr, psr) assert_eq(sr_copy, psr_copy) sr.replace({5: 0, 3: -5}) psr.replace({5: 0, 3: -5}) assert_eq(sr, psr) assert_eq(sr_copy, psr_copy) srr = sr.replace() psrr = psr.replace() assert_eq(srr, psrr) psr = pd.Series(["one", "two", "three"], dtype="category") sr = cudf.from_pandas(psr) sr_copy = sr.copy() psr_copy = psr.copy() assert_eq(sr, psr) assert_eq(sr_copy, psr_copy) sr.replace("one", "two", inplace=True) psr.replace("one", "two", inplace=True) assert_eq(sr, psr) assert_eq(sr_copy, psr_copy) pdf = pd.DataFrame({"A": [0, 1, 2, 3, 4], "B": [5, 6, 7, 8, 9]}) gdf = cudf.from_pandas(pdf) pdf_copy = pdf.copy() gdf_copy = gdf.copy() assert_eq(pdf, gdf) assert_eq(pdf_copy, gdf_copy) pdf.replace(5, 0, inplace=True) gdf.replace(5, 0, inplace=True) assert_eq(pdf, gdf) assert_eq(pdf_copy, gdf_copy) pds = pd.Series([1, 2, 3, 45]) gds = cudf.from_pandas(pds) vals = np.array([]).astype(int) assert_eq(pds.replace(vals, -1), gds.replace(vals, -1)) pds.replace(vals, 77, inplace=True) gds.replace(vals, 77, inplace=True) assert_eq(pds, gds) pdf = pd.DataFrame({"a": [1, 2, 3, 4, 5, 666]}) gdf = cudf.from_pandas(pdf) assert_eq( pdf.replace({"a": 2}, {"a": -33}), gdf.replace({"a": 2}, {"a": -33}) ) assert_eq( pdf.replace({"a": [2, 5]}, {"a": [9, 10]}), gdf.replace({"a": [2, 5]}, {"a": [9, 10]}), ) assert_eq( pdf.replace([], []), gdf.replace([], []), ) assert_exceptions_equal( lfunc=pdf.replace, rfunc=gdf.replace, lfunc_args_and_kwargs=([], {"to_replace": -1, "value": []}), rfunc_args_and_kwargs=([], {"to_replace": -1, "value": []}), compare_error_message=False, )
def test_serialize_series(): sr = cudf.Series(np.arange(100)) outsr = cudf.Series.deserialize(*sr.serialize()) assert_eq(sr, outsr)
def test_multiIndex_size(pdi): gdi = cudf.from_pandas(pdi) assert_eq(pdi.size, gdi.size)