def test_arbitrary_discretiser(): boston_dataset = load_boston() data = pd.DataFrame(boston_dataset.data, columns=boston_dataset.feature_names) user_dict = {"LSTAT": [0, 10, 20, 30, np.Inf]} data_t1 = data.copy() data_t2 = data.copy() data_t1["LSTAT"] = pd.cut(data["LSTAT"], bins=[0, 10, 20, 30, np.Inf]) data_t2["LSTAT"] = pd.cut(data["LSTAT"], bins=[0, 10, 20, 30, np.Inf], labels=False) transformer = ArbitraryDiscretiser( binning_dict=user_dict, return_object=False, return_boundaries=False ) X = transformer.fit_transform(data) # init params assert transformer.return_object is False assert transformer.return_boundaries is False # fit params assert transformer.variables_ == ["LSTAT"] assert transformer.binner_dict_ == user_dict # transform params pd.testing.assert_frame_equal(X, data_t2) transformer = ArbitraryDiscretiser( binning_dict=user_dict, return_object=False, return_boundaries=True ) X = transformer.fit_transform(data) pd.testing.assert_frame_equal(X, data_t1)
def test_arbitrary_discretiser(): california_dataset = fetch_california_housing() data = pd.DataFrame(california_dataset.data, columns=california_dataset.feature_names) user_dict = {"HouseAge": [0, 20, 40, 60, np.Inf]} data_t1 = data.copy() data_t2 = data.copy() # HouseAge is the median house age in the block group. data_t1["HouseAge"] = pd.cut(data["HouseAge"], bins=[0, 20, 40, 60, np.Inf]) data_t1["HouseAge"] = data_t1["HouseAge"].astype(str) data_t2["HouseAge"] = pd.cut(data["HouseAge"], bins=[0, 20, 40, 60, np.Inf], labels=False) transformer = ArbitraryDiscretiser(binning_dict=user_dict, return_object=False, return_boundaries=False) X = transformer.fit_transform(data) # init params assert transformer.return_object is False assert transformer.return_boundaries is False # fit params assert transformer.variables_ == ["HouseAge"] assert transformer.binner_dict_ == user_dict # transform params pd.testing.assert_frame_equal(X, data_t2) transformer = ArbitraryDiscretiser(binning_dict=user_dict, return_object=False, return_boundaries=True) X = transformer.fit_transform(data) pd.testing.assert_frame_equal(X, data_t1)