def test_drop_null_transformer_transform_default_pct_null_threshold(): drop_null_transformer = DropNullColumns() X = pd.DataFrame({ 'lots_of_null': [None, None, None, None, 5], 'no_null': [1, 2, 3, 4, 5] }) drop_null_transformer.fit(X) assert drop_null_transformer.transform(X).equals(X)
def test_drop_null_transformer_transform_default_pct_null_threshold(): drop_null_transformer = DropNullColumns() X = pd.DataFrame({ 'lots_of_null': [None, None, None, None, 5], 'no_null': [1, 2, 3, 4, 5] }) X_expected = X.astype({'lots_of_null': 'float64', 'no_null': 'Int64'}) drop_null_transformer.fit(X) X_t = drop_null_transformer.transform(X) assert_frame_equal(X_expected, X_t.to_dataframe())
def test_drop_null_transformer_transform_boundary_pct_null_threshold(): drop_null_transformer = DropNullColumns(pct_null_threshold=0.0) X = pd.DataFrame({ 'all_null': [None, None, None, None, None], 'lots_of_null': [None, None, None, None, 5], 'some_null': [None, 0, 3, 4, 5] }) drop_null_transformer.fit(X) assert drop_null_transformer.transform(X).empty drop_null_transformer = DropNullColumns(pct_null_threshold=1.0) drop_null_transformer.fit(X) assert drop_null_transformer.transform(X).equals( X.drop(["all_null"], axis=1)) # check that X is untouched assert X.equals( pd.DataFrame({ 'all_null': [None, None, None, None, None], 'lots_of_null': [None, None, None, None, 5], 'some_null': [None, 0, 3, 4, 5] }))
def test_drop_null_transformer_woodwork_custom_overrides_returned_by_components( X_df, has_nan): y = pd.Series([1, 2, 1]) if has_nan: X_df['all null'] = [np.nan, np.nan, np.nan] override_types = [Integer, Double, Categorical, NaturalLanguage, Boolean] for logical_type in override_types: try: X = ww.DataTable(X_df, logical_types={0: logical_type}) except TypeError: continue drop_null_transformer = DropNullColumns() drop_null_transformer.fit(X) transformed = drop_null_transformer.transform(X, y) assert isinstance(transformed, ww.DataTable) assert transformed.logical_types == {0: logical_type}
def test_drop_null_transformer_transform_custom_pct_null_threshold(): X = pd.DataFrame({ 'lots_of_null': [None, None, None, None, 5], 'all_null': [None, None, None, None, None], 'no_null': [1, 2, 3, 4, 5] }) drop_null_transformer = DropNullColumns(pct_null_threshold=0.5) drop_null_transformer.fit(X) assert drop_null_transformer.transform(X).equals( X.drop(["lots_of_null", "all_null"], axis=1)) # check that X is untouched assert X.equals( pd.DataFrame({ 'lots_of_null': [None, None, None, None, 5], 'all_null': [None, None, None, None, None], 'no_null': [1, 2, 3, 4, 5] }))
def test_drop_null_transformer_transform_custom_pct_null_threshold(): X = pd.DataFrame({ 'lots_of_null': [None, None, None, None, 5], 'all_null': [None, None, None, None, None], 'no_null': [1, 2, 3, 4, 5] }) drop_null_transformer = DropNullColumns(pct_null_threshold=0.5) X_expected = X.drop(["lots_of_null", "all_null"], axis=1) X_expected = X_expected.astype({"no_null": "Int64"}) drop_null_transformer.fit(X) X_t = drop_null_transformer.transform(X) assert_frame_equal(X_expected, X_t.to_dataframe()) # check that X is untouched assert X.equals( pd.DataFrame({ 'lots_of_null': [None, None, None, None, 5], 'all_null': [None, None, None, None, None], 'no_null': [1, 2, 3, 4, 5] }))