def test_transform_meta_override(): X = pd.DataFrame({"cat_s": ["a", "b", "c", "d"]}) dd_X = dd.from_pandas(X, npartitions=2) base = OneHotEncoder(sparse=False) base.fit(pd.DataFrame(X)) # Failure when not proving transform_meta # because of value dependent model wrap = ParallelPostFit(base) with pytest.raises(ValueError): wrap.transform(dd_X) wrap = ParallelPostFit(base, transform_meta=np.array([[0, 0, 0, 0]], dtype=np.float64)) result = wrap.transform(dd_X) expected = base.transform(X) assert_eq_ar(result, expected)
def test_transform(kind): X, y = make_classification(chunks=100) if kind == "numpy": X, y = dask.compute(X, y) elif kind == "dask.dataframe": X = dd.from_dask_array(X) y = dd.from_dask_array(y) base = PCA(random_state=0) wrap = ParallelPostFit(PCA(random_state=0)) base.fit(*dask.compute(X, y)) wrap.fit(*dask.compute(X, y)) assert_estimator_equal(wrap.estimator, base) result = base.transform(*dask.compute(X)) expected = wrap.transform(X) assert_eq_ar(result, expected)