def test_non_transformer(dataset_with_single_grouping): X, y, _, _, grouper = dataset_with_single_grouping # This is not a transformer trf = LinearRegression() transformer = GroupedTransformer(trf, groups=grouper) with pytest.raises(ValueError): transformer.fit(X, y)
def test_exception_in_group(multiple_obs_fitter): X = np.array([ [1, 2], [1, 0], [2, 1], ]) # Only works on groups greater than 1, so will raise an error in group 2 transformer = GroupedTransformer(multiple_obs_fitter, groups=0, use_global_model=False) with pytest.raises(ValueError) as e: transformer.fit(X) assert "group 2" in str(e)
def test_missing_groups_transform_noglobal(dataset_with_single_grouping, scaling_range): X, y, groups, X_with_groups, grouper = dataset_with_single_grouping trf = MinMaxScaler(scaling_range) transformer = GroupedTransformer(trf, groups=grouper, use_global_model=False) transformer.fit(X_with_groups, y) # Array with 2 rows, first column a new group. Remaining top are out of range so should be the range X_test = np.concatenate([ np.array([[3], [3]]), np.stack([X.min(axis=0) - 1, X.max(axis=0) + 1], axis=0) ], axis=1) with pytest.raises(ValueError): transformer.transform(X_test)
def test_missing_groups_transform_global(dataset_with_single_grouping, scaling_range): X, y, groups, X_with_groups, grouper = dataset_with_single_grouping trf = MinMaxScaler(scaling_range) transformer = GroupedTransformer(trf, groups=grouper) transformer.fit(X_with_groups, y) # Array with 2 rows, first column a new group. Remaining top are out of range so should be the range X_test = np.concatenate([ np.array([[3], [3]]), np.stack([X.min(axis=0), X.max(axis=0)], axis=0) ], axis=1) transformed = transformer.transform(X_test) # Top row should all be equal to the small value of the range, bottom the other assert np.allclose(transformed[0, :], scaling_range[0]) assert np.allclose(transformed[1, :], scaling_range[1])
def test_all_groups_scaled(dataset_with_single_grouping, scaling_range): X, y, groups, X_with_groups, grouper = dataset_with_single_grouping trf = MinMaxScaler(scaling_range) transformer = GroupedTransformer(trf, groups=grouper) transformed = transformer.fit(X_with_groups, y).transform(X_with_groups) df_with_groups = pd.concat( [pd.Series(groups.flatten(), name="G"), pd.DataFrame(transformed)], axis=1) assert np.allclose(df_with_groups.groupby("G").min(), scaling_range[0]) assert np.allclose(df_with_groups.groupby("G").max(), scaling_range[1])
def test_group_correlation_minmaxscaler(dataset_with_single_grouping, scaling_range): X, y, groups, X_with_groups, grouper = dataset_with_single_grouping trf = MinMaxScaler(scaling_range) transformer = GroupedTransformer(trf, groups=grouper) transformed = transformer.fit(X_with_groups, y).transform(X_with_groups) # For each column, check that all grouped correlations are 1 (because MinMaxScaler scales linear) for col in range(X.shape[1]): assert (pd.concat([ pd.Series(groups.flatten(), name="group"), pd.Series(X[:, col], name="original"), pd.Series(transformed[:, col], name="transformed"), ], axis=1).groupby("group").corr().pipe(np.allclose, 1))
def test_multiple_grouping_columns(dataset_with_multiple_grouping, scaling_range): X, y, groups, X_with_groups, grouper = dataset_with_multiple_grouping trf = MinMaxScaler(scaling_range) transformer = GroupedTransformer(trf, groups=grouper) transformed = transformer.fit(X_with_groups, y).transform(X_with_groups) df_with_groups = pd.concat( [pd.DataFrame(groups, columns=["A", "B"]), pd.DataFrame(transformed)], axis=1) assert np.allclose( df_with_groups.groupby(["A", "B"]).min(), scaling_range[0]) # If a group has a single element, it defaults to min, so check wether all maxes are one of the bounds maxes = df_with_groups.groupby(["A", "B"]).max() assert np.all( np.isclose(maxes, scaling_range[1]) | np.isclose(maxes, scaling_range[0]) # We have at least some groups larger than 1, so there we should find the max of the range ) and np.any(np.isclose(maxes, scaling_range[1]))