def test_basic(self, daskify, values): de = dpp.OrdinalEncoder() df = dummy[["A", "D"]] if daskify: df = dd.from_pandas(df, 2) de = de.fit(df) trn = de.transform(df) expected = pd.DataFrame( { "A": np.array([0, 1, 2, 0], dtype="int8"), "D": np.array([1, 2, 3, 4]) }, columns=["A", "D"], ) assert_eq_df(trn, expected) if values: trn = trn.values result = de.inverse_transform(trn) if daskify: df = df.compute() result = result.compute() tm.assert_frame_equal(result, df)
def test_inverse_transform(self): enc = dpp.OrdinalEncoder() df = dd.from_pandas(pd.DataFrame({ "A": np.arange(10), "B": pd.Categorical(['a'] * 4 + ['b'] * 6) }), npartitions=2) enc.fit(df) assert_eq_df(df, enc.inverse_transform(enc.transform(df))) assert_eq_df(df, enc.inverse_transform(enc.transform(df).compute())) assert_eq_df(df, enc.inverse_transform(enc.transform(df).values)) assert_eq_df(df, enc.inverse_transform(enc.transform(df).values.compute()))
def test_inverse_transform(self): enc = dpp.OrdinalEncoder() df = dd.from_pandas( pd.DataFrame( {"A": np.arange(10), "B": pd.Categorical(["a"] * 4 + ["b"] * 6)} ), npartitions=2, ) enc.fit(df) assert dask.is_dask_collection(enc.inverse_transform(enc.transform(df).values)) assert dask.is_dask_collection(enc.inverse_transform(enc.transform(df))) assert_eq_df(df, enc.inverse_transform(enc.transform(df))) assert_eq_df(df, enc.inverse_transform(enc.transform(df))) assert_eq_df(df, enc.inverse_transform(enc.transform(df).values)) assert_eq_df(df, enc.inverse_transform(enc.transform(df).values))
def test_transform_raises(self): de = dpp.OrdinalEncoder() de.fit(dummy) with pytest.raises(ValueError) as rec: de.transform(dummy.drop("B", axis="columns")) assert rec.match("Columns of 'X' do not match the training")
def test_da(self): a = dd.from_pandas(dummy, npartitions=2) de = dpp.OrdinalEncoder() result = de.fit_transform(a) assert isinstance(result, dd.DataFrame)