Пример #1
0
    def test_basic(self, daskify, values):
        de = dpp.OrdinalEncoder()
        df = dummy[["A", "D"]]
        if daskify:
            df = dd.from_pandas(df, 2)
        de = de.fit(df)
        trn = de.transform(df)

        expected = pd.DataFrame(
            {
                "A": np.array([0, 1, 2, 0], dtype="int8"),
                "D": np.array([1, 2, 3, 4])
            },
            columns=["A", "D"],
        )

        assert_eq_df(trn, expected)

        if values:
            trn = trn.values

        result = de.inverse_transform(trn)

        if daskify:
            df = df.compute()
            result = result.compute()

        tm.assert_frame_equal(result, df)
Пример #2
0
 def test_inverse_transform(self):
     enc = dpp.OrdinalEncoder()
     df = dd.from_pandas(pd.DataFrame({
         "A":
         np.arange(10),
         "B":
         pd.Categorical(['a'] * 4 + ['b'] * 6)
     }),
                         npartitions=2)
     enc.fit(df)
     assert_eq_df(df, enc.inverse_transform(enc.transform(df)))
     assert_eq_df(df, enc.inverse_transform(enc.transform(df).compute()))
     assert_eq_df(df, enc.inverse_transform(enc.transform(df).values))
     assert_eq_df(df,
                  enc.inverse_transform(enc.transform(df).values.compute()))
Пример #3
0
    def test_inverse_transform(self):
        enc = dpp.OrdinalEncoder()
        df = dd.from_pandas(
            pd.DataFrame(
                {"A": np.arange(10), "B": pd.Categorical(["a"] * 4 + ["b"] * 6)}
            ),
            npartitions=2,
        )
        enc.fit(df)

        assert dask.is_dask_collection(enc.inverse_transform(enc.transform(df).values))
        assert dask.is_dask_collection(enc.inverse_transform(enc.transform(df)))

        assert_eq_df(df, enc.inverse_transform(enc.transform(df)))
        assert_eq_df(df, enc.inverse_transform(enc.transform(df)))
        assert_eq_df(df, enc.inverse_transform(enc.transform(df).values))
        assert_eq_df(df, enc.inverse_transform(enc.transform(df).values))
Пример #4
0
 def test_transform_raises(self):
     de = dpp.OrdinalEncoder()
     de.fit(dummy)
     with pytest.raises(ValueError) as rec:
         de.transform(dummy.drop("B", axis="columns"))
     assert rec.match("Columns of 'X' do not match the training")
Пример #5
0
 def test_da(self):
     a = dd.from_pandas(dummy, npartitions=2)
     de = dpp.OrdinalEncoder()
     result = de.fit_transform(a)
     assert isinstance(result, dd.DataFrame)