def test_transform_numeric(self): """Test the ``transform`` on numeric input. In this test ``transform`` should return a matrix representing each item in the input as one-hot encodings. Input: - Series with numeric input Output: - one-hot encoding of the input """ # Setup ohet = OneHotEncodingTransformer() data = pd.Series([1, 2]) ohet.fit(data) expected = np.array([ [1, 0], [0, 1], ]) # Run out = ohet.transform(data) # Assert assert not ohet.dummy_encoded np.testing.assert_array_equal(out, expected)
def test_transform_unknown(self): """Test the ``transform`` with unknown data. In this test ``transform`` should raise an error due to the attempt of transforming data with previously unseen categories. Input: - Series with unknown categorical values """ # Setup ohet = OneHotEncodingTransformer() data = pd.Series(['a']) ohet.fit(data) # Assert with np.testing.assert_raises(ValueError): ohet.transform(['b'])
def test_one_hot_numerical_nans(): """Ensure OneHotEncodingTransformer works on numerical + nan only columns.""" data = pd.Series([1, 2, float('nan'), np.nan]) transformer = OneHotEncodingTransformer() transformer.fit(data) transformed = transformer.transform(data) reverse = transformer.reverse_transform(transformed) pd.testing.assert_series_equal(reverse, data)
def test_transform_single(self): """Test the ``transform`` on a single category. In this test ``transform`` should return a column filled with ones. Input: - Series with a single categorical value Output: - one-hot encoding of the input """ # Setup ohet = OneHotEncodingTransformer() data = pd.Series(['a', 'a', 'a']) ohet.fit(data) # Run out = ohet.transform(data) # Assert expected = np.array([[1], [1], [1]]) np.testing.assert_array_equal(out, expected)
def test_transform_nans(self): """Test the ``transform`` with nans. In this test ``transform`` should return an identity matrix representing each item in the input as well as nans. Input: - Series with categorical values and nans Output: - one-hot encoding of the input """ # Setup ohet = OneHotEncodingTransformer() data = pd.Series(['a', 'b', None]) ohet.fit(data) # Run out = ohet.transform(data) # Assert expected = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]]) np.testing.assert_array_equal(out, expected)