def test_basic(self): a = dpp.PolynomialFeatures() b = spp.PolynomialFeatures() a.fit(X) b.fit(X.compute()) assert_estimator_equal(a._transformer, b)
def test_transformer_params(self): pf = dpp.PolynomialFeatures(degree=3, interaction_only=True, include_bias=False) pf.fit(X) assert pf._transformer.degree == pf.degree assert pf._transformer.interaction_only is pf.interaction_only assert pf._transformer.include_bias is pf.include_bias
def test_df_transform_index(self, daskify): frame = copy(df) if not daskify: frame = frame.compute() frame = frame.sample(frac=1.0) res_df = dpp.PolynomialFeatures(preserve_dataframe=True, degree=1).fit_transform(frame) assert_eq_df(res_df.iloc[:, 1:], frame, check_dtype=False)
def test_df_transform(self, daskify): frame = df if not daskify: frame = frame.compute() a = dpp.PolynomialFeatures(preserve_dataframe=True) b = dpp.PolynomialFeatures() c = spp.PolynomialFeatures() res_df = a.fit_transform(frame) res_arr = b.fit_transform(frame) res_c = c.fit_transform(frame) if daskify: res_pandas = a.fit_transform(frame.compute()) assert dask.is_dask_collection(res_df) assert dask.is_dask_collection(res_arr) assert_eq_df(res_df.compute().reset_index(drop=True), res_pandas) assert_eq_ar(res_df.values, res_c) assert_eq_ar(res_df.values, res_arr)
def test_array_transform(self): a = dpp.PolynomialFeatures() b = spp.PolynomialFeatures() res_a = a.fit_transform(X) res_b = b.fit_transform(X.compute()) assert_estimator_equal(a, b) assert dask.is_dask_collection(res_a) assert_eq_ar(res_a, res_b)
def test_input_types(self): a = dpp.PolynomialFeatures() b = spp.PolynomialFeatures() assert_estimator_equal(a.fit(df), a.fit(df.compute())) assert_estimator_equal(a.fit(df), a.fit(df.compute().values)) assert_estimator_equal(a.fit(df.values), a.fit(df.compute().values)) assert_estimator_equal(a.fit(df), b.fit(df.compute())) assert_estimator_equal(a.fit(df), b.fit(df.compute().values))
def test_transform_array(self): a = dpp.PolynomialFeatures() b = spp.PolynomialFeatures() # pass numpy array to fit_transform res_a1 = a.fit_transform(X.compute()) # pass dask array to fit_transform res_a2 = a.fit_transform(X).compute() res_b = b.fit_transform(X.compute()) assert_eq_ar(res_a1, res_b) assert_eq_ar(res_a2, res_b)
def test_transformed_shape(self): # checks if the transformed objects have the correct columns a = dpp.PolynomialFeatures() a.fit(X) n_cols = len(a.get_feature_names()) # dask array assert a.transform(X).shape[1] == n_cols # numpy array assert a.transform(X.compute()).shape[1] == n_cols # dask dataframe assert a.transform(df).shape[1] == n_cols # pandas dataframe assert a.transform(df.compute()).shape[1] == n_cols X_nan_rows = df.values df_none_divisions = X_nan_rows.to_dask_dataframe(columns=df.columns) # dask array with nan rows assert a.transform(X_nan_rows).shape[1] == n_cols # dask data frame with nan rows assert a.transform(df_none_divisions).shape[1] == n_cols