def test_threshold_encoder_with_all_columns_under_threshold(): encoder = ThresholdOneHotEncoder(threshold=0.01) Xt = encoder.fit_transform(np.array([[1, 2], [1, 3]])).todense() assert_array_equal(np.array([ [1, 1, 0], [1, 0, 1], ]), Xt)
def test_threshold_encoder_with_no_columns_over_threshold(): encoder = ThresholdOneHotEncoder(threshold=1000) Xt = encoder.fit_transform(np.array([[1, 2], [1, 3]])).todense() assert_array_equal(np.array([ [0, 0], [0, 0], ]), Xt)
def test_threshold_encoder(X, X_expected_categories, X_expected, max_categories, threshold): enc = ThresholdOneHotEncoder(threshold=threshold, max_categories=max_categories) X_observed_sparse = enc.fit_transform(X) assert isinstance(X_observed_sparse, sp.csr_matrix) assert len(enc.categories_) == len(X_expected_categories) for observed_category, expected_category in zip(enc.categories_, X_expected_categories): np.testing.assert_array_equal(observed_category, expected_category) X_observed_dense = X_observed_sparse.toarray() np.testing.assert_array_equal(X_observed_dense, X_expected)