Пример #1
0
def test_subset_transformer_mutate(sample_data):
    """After modifying one column, that column is different and the complement is the same"""  # noqa
    X_tr, X_te = sample_data

    input = 'size'
    t = ballet.eng.SubsetTransformer(input, lambda x: x + 1)
    result_tr = t.fit_transform(X_tr)
    result_te = t.transform(X_te)

    # the input col is modified
    assert_series_not_equal(result_tr[input], X_tr[input])
    assert_series_not_equal(result_te[input], X_te[input])

    # the complement is passed through unchanged
    complement = [col for col in X_tr.columns if col != input]
    assert_frame_equal(result_tr[complement], X_tr[complement])
    assert_frame_equal(result_te[complement], X_te[complement])
Пример #2
0
def test_conditional_transformer_both_satisfied(sample_data):
    X_tr, X_te = sample_data

    t = ballet.eng.ConditionalTransformer(
        lambda ser: ser.sum() > 0,
        lambda ser: ser + 1,
    )

    # all the features are selected by sum > 0
    t.fit(X_tr)
    result_tr = t.transform(X_tr)
    for col in ['value', 'size']:
        assert_series_not_equal(result_tr[col], X_tr[col])

    result_te = t.transform(X_te)
    for col in ['value', 'size']:
        assert_series_not_equal(result_te[col], X_te[col])
Пример #3
0
def test_conditional_transformer_one_satisfied(sample_data):
    X_tr, X_te = sample_data

    t = ballet.eng.ConditionalTransformer(
        lambda ser: (ser.dropna() >= 3).all(),
        lambda ser: ser.fillna(0) + 1,
    )

    t.fit(X_tr)
    result_tr = t.transform(X_tr)
    result_te = t.transform(X_te)

    # only 'size' is selected by the condition
    assert_series_not_equal(result_tr['size'], X_tr['size'])
    assert_series_not_equal(result_te['size'], X_te['size'])

    # 'value' is not selected by the condition, has items less than 3
    assert_series_equal(result_tr['value'], X_tr['value'])
    assert_series_equal(result_te['value'], X_te['value'])
Пример #4
0
def test_conditional_transformer_unsatisfy_transform(sample_data):
    X_tr, X_te = sample_data

    t = ballet.eng.ConditionalTransformer(
        lambda ser: (ser.dropna() >= 3).all(),
        lambda ser: ser,
        lambda ser: ser.fillna(0) - 1,
    )

    t.fit(X_tr)
    result_tr = t.transform(X_tr)
    result_te = t.transform(X_te)

    # size is transformed by satisfy condition, but passed through
    assert_series_equal(result_tr['size'], X_tr['size'])
    assert_series_equal(result_te['size'], X_te['size'])

    # value is transformed by unsatisfy condition and is not equal
    assert_series_not_equal(result_tr['value'], X_tr['value'])
    assert_series_not_equal(result_te['value'], X_te['value'])
Пример #5
0
def test_assert_series_not_equal():
    a = pd.Series(np.arange(21))
    b = a.copy()
    with pytest.raises(AssertionError):
        assert_series_not_equal(a, b)

    c = a + 1
    assert_series_not_equal(a, c)

    d = pd.Series(np.arange(17))
    assert_series_not_equal(a, d)

    e = pdt.makeDataFrame()
    assert_series_not_equal(a, e)