示例#1
0
 def test_given_categories(self):
     cats = ["a", "b", "c", "d"]
     ce = dpp.Categorizer(categories={"A": (cats, True)})
     trn = ce.fit_transform(raw)
     assert trn["A"].dtype == "category"
     tm.assert_index_equal(trn["A"].cat.categories, pd.Index(cats))
     assert all(trn["A"].cat.categories == cats)
     assert trn["A"].cat.ordered
示例#2
0
 def test_given_categories(self):
     cats = ['a', 'b', 'c', 'd']
     ce = dpp.Categorizer(categories={'A': (cats, True)})
     trn = ce.fit_transform(raw)
     assert trn['A'].dtype == 'category'
     tm.assert_index_equal(trn['A'].cat.categories, pd.Index(cats))
     assert all(trn['A'].cat.categories == cats)
     assert trn['A'].cat.ordered
示例#3
0
 def test_dask(self):
     a = dd.from_pandas(raw, npartitions=2)
     ce = dpp.Categorizer()
     trn = ce.fit_transform(a)
     assert is_categorical_dtype(trn['A'])
     assert is_categorical_dtype(trn['B'])
     assert is_categorical_dtype(trn['C'])
     assert trn['D'].dtype == int
     tm.assert_index_equal(ce.columns_, pd.Index(['A', 'B', 'C']))
示例#4
0
 def test_dask(self):
     a = dd.from_pandas(raw, npartitions=2)
     ce = dpp.Categorizer()
     trn = ce.fit_transform(a)
     assert is_categorical_dtype(trn["A"])
     assert is_categorical_dtype(trn["B"])
     assert is_categorical_dtype(trn["C"])
     assert trn["D"].dtype == int
     tm.assert_index_equal(ce.columns_, pd.Index(["A", "B", "C"]))
示例#5
0
 def test_ce(self):
     ce = dpp.Categorizer()
     original = raw.copy()
     trn = ce.fit_transform(raw)
     assert is_categorical_dtype(trn['A'])
     assert is_categorical_dtype(trn['B'])
     assert is_categorical_dtype(trn['C'])
     assert trn['D'].dtype == int
     tm.assert_index_equal(ce.columns_, pd.Index(['A', 'B', 'C']))
     tm.assert_frame_equal(raw, original)
示例#6
0
 def test_ce(self):
     ce = dpp.Categorizer()
     original = raw.copy()
     trn = ce.fit_transform(raw)
     assert is_categorical_dtype(trn["A"])
     assert is_categorical_dtype(trn["B"])
     assert is_categorical_dtype(trn["C"])
     assert trn["D"].dtype == int
     tm.assert_index_equal(ce.columns_, pd.Index(["A", "B", "C"]))
     tm.assert_frame_equal(raw, original)
示例#7
0
    def test_raises(self):
        ce = dpp.Categorizer()
        X = np.array([[0, 0], [1, 1]])
        with pytest.raises(TypeError):
            ce.fit(X)

        X = da.from_array(X, chunks=(2, 2))
        with pytest.raises(TypeError):
            ce.fit(X)

        with pytest.raises(NotFittedError):
            ce.transform(raw)
示例#8
0
 def test_categorical_dtype(self):
     ce = dpp.Categorizer()
     ce.fit(raw)
     assert (hash(ce.categories_['A']) == hash(
         pd.api.types.CategoricalDtype(['a', 'b', 'c'], False)))
示例#9
0
 def test_non_categorical_dtype(self):
     ce = dpp.Categorizer()
     ce.fit(raw)
     idx, ordered = ce.categories_['A']
     tm.assert_index_equal(idx, pd.Index(['a', 'b', 'c']))
     assert ordered is False
示例#10
0
 def test_columns(self):
     ce = dpp.Categorizer(columns=['A'])
     trn = ce.fit_transform(raw)
     assert is_categorical_dtype(trn['A'])
     assert is_object_dtype(trn['B'])
示例#11
0
 def test_categorical_dtype(self):
     ce = dpp.Categorizer()
     ce.fit(raw)
     assert hash(ce.categories_["A"]) == hash(
         pd.api.types.CategoricalDtype(["a", "b", "c"], False))
示例#12
0
 def test_non_categorical_dtype(self):
     ce = dpp.Categorizer()
     ce.fit(raw)
     idx, ordered = ce.categories_["A"]
     tm.assert_index_equal(idx, pd.Index(["a", "b", "c"]))
     assert ordered is False