def test_inverse_transform(orig_label, ord_label, expected_reverted, bad_ord_label, use_fit_transform, client): n_workers = len(client.has_what()) orig_label = dask_cudf.from_cudf(orig_label, npartitions=n_workers) ord_label = dask_cudf.from_cudf(ord_label, npartitions=n_workers) expected_reverted = dask_cudf.from_cudf(expected_reverted, npartitions=n_workers) bad_ord_label = dask_cudf.from_cudf(bad_ord_label, npartitions=n_workers) # prepare LabelEncoder le = LabelEncoder() if use_fit_transform: le.fit_transform(orig_label) else: le.fit(orig_label) assert (le._fitted is True) # test if inverse_transform is correct reverted = le.inverse_transform(ord_label) reverted = reverted.compute().reset_index(drop=True) expected_reverted = expected_reverted.compute() assert (len(reverted) == len(expected_reverted)) assert (len(reverted) == len(reverted[reverted == expected_reverted])) # test if correctly raies ValueError with pytest.raises(ValueError, match='y contains previously unseen label'): le.inverse_transform(bad_ord_label).compute()
def test_empty_input(empty, ord_label, client): # prepare LabelEncoder n_workers = len(client.has_what()) empty = dask_cudf.from_cudf(empty, npartitions=n_workers) ord_label = dask_cudf.from_cudf(ord_label, npartitions=n_workers) le = LabelEncoder() le.fit(empty) assert (le._fitted is True) # test if correctly raies ValueError with pytest.raises(ValueError, match='y contains previously unseen label'): le.inverse_transform(ord_label).compute() # check fit_transform() le = LabelEncoder() transformed = le.fit_transform(empty).compute() assert (le._fitted is True) assert (len(transformed) == 0)