def test_given_categories(self): cats = ["a", "b", "c", "d"] ce = dpp.Categorizer(categories={"A": (cats, True)}) trn = ce.fit_transform(raw) assert trn["A"].dtype == "category" tm.assert_index_equal(trn["A"].cat.categories, pd.Index(cats)) assert all(trn["A"].cat.categories == cats) assert trn["A"].cat.ordered
def test_given_categories(self): cats = ['a', 'b', 'c', 'd'] ce = dpp.Categorizer(categories={'A': (cats, True)}) trn = ce.fit_transform(raw) assert trn['A'].dtype == 'category' tm.assert_index_equal(trn['A'].cat.categories, pd.Index(cats)) assert all(trn['A'].cat.categories == cats) assert trn['A'].cat.ordered
def test_dask(self): a = dd.from_pandas(raw, npartitions=2) ce = dpp.Categorizer() trn = ce.fit_transform(a) assert is_categorical_dtype(trn['A']) assert is_categorical_dtype(trn['B']) assert is_categorical_dtype(trn['C']) assert trn['D'].dtype == int tm.assert_index_equal(ce.columns_, pd.Index(['A', 'B', 'C']))
def test_dask(self): a = dd.from_pandas(raw, npartitions=2) ce = dpp.Categorizer() trn = ce.fit_transform(a) assert is_categorical_dtype(trn["A"]) assert is_categorical_dtype(trn["B"]) assert is_categorical_dtype(trn["C"]) assert trn["D"].dtype == int tm.assert_index_equal(ce.columns_, pd.Index(["A", "B", "C"]))
def test_ce(self): ce = dpp.Categorizer() original = raw.copy() trn = ce.fit_transform(raw) assert is_categorical_dtype(trn['A']) assert is_categorical_dtype(trn['B']) assert is_categorical_dtype(trn['C']) assert trn['D'].dtype == int tm.assert_index_equal(ce.columns_, pd.Index(['A', 'B', 'C'])) tm.assert_frame_equal(raw, original)
def test_ce(self): ce = dpp.Categorizer() original = raw.copy() trn = ce.fit_transform(raw) assert is_categorical_dtype(trn["A"]) assert is_categorical_dtype(trn["B"]) assert is_categorical_dtype(trn["C"]) assert trn["D"].dtype == int tm.assert_index_equal(ce.columns_, pd.Index(["A", "B", "C"])) tm.assert_frame_equal(raw, original)
def test_raises(self): ce = dpp.Categorizer() X = np.array([[0, 0], [1, 1]]) with pytest.raises(TypeError): ce.fit(X) X = da.from_array(X, chunks=(2, 2)) with pytest.raises(TypeError): ce.fit(X) with pytest.raises(NotFittedError): ce.transform(raw)
def test_categorical_dtype(self): ce = dpp.Categorizer() ce.fit(raw) assert (hash(ce.categories_['A']) == hash( pd.api.types.CategoricalDtype(['a', 'b', 'c'], False)))
def test_non_categorical_dtype(self): ce = dpp.Categorizer() ce.fit(raw) idx, ordered = ce.categories_['A'] tm.assert_index_equal(idx, pd.Index(['a', 'b', 'c'])) assert ordered is False
def test_columns(self): ce = dpp.Categorizer(columns=['A']) trn = ce.fit_transform(raw) assert is_categorical_dtype(trn['A']) assert is_object_dtype(trn['B'])
def test_categorical_dtype(self): ce = dpp.Categorizer() ce.fit(raw) assert hash(ce.categories_["A"]) == hash( pd.api.types.CategoricalDtype(["a", "b", "c"], False))
def test_non_categorical_dtype(self): ce = dpp.Categorizer() ce.fit(raw) idx, ordered = ce.categories_["A"] tm.assert_index_equal(idx, pd.Index(["a", "b", "c"])) assert ordered is False