示例#1
0
def test_date_trans():
    converter = DateTransformer(cols=["b", "c", "d"],
                                date_format=["%m/%d/%Y", None, None])

    trans = converter.fit_transform(df)
    b = trans["b"].tolist()
    c = trans["c"].tolist()
    d = trans["d"].tolist()

    # assert which are null
    assert b[3] is pd.NaT
    assert c[4] is pd.NaT
    assert d[3] is pd.NaT

    # assert on the dt values
    assert b[0] == d[0] == c[0] == _6_1
    assert b[1] == d[1] == c[1] == _6_2
    assert b[2] == d[2] == c[2] == _6_3
    assert c[3] == _6_4
    assert b[4] == d[4] == _6_5

    # Test that we fail on column A
    assert_raises(ValueError, DateTransformer(cols=["a"]).fit, df)

    # But if we allow int64, show it will pass.
    DateTransformer(cols=["a"], allowed_types=("int64")).fit(df)
示例#2
0
def test_chi2_validator():
    data = np.random.RandomState(42).rand(10000, 4)

    # these will all become categorical
    x = (data > 0.4).astype(int)
    x[data > 0.75] = 2

    # Now split and test
    train = x[:9000, :]
    test = x[9000:, :]

    # show the validator will work initially since they're all
    # roughly the same number of occurrences
    val = DistHypothesisValidator(action="raise")
    val.fit(train).transform(test)

    # Make some adjustments to force this to fail
    # test set col 0 should have nothing but 2s
    t2 = test.copy()
    t2[:, 0] = 2
    assert_raises(ValueError, val.transform, t2)

    # now show that if the strategy for categorical vars were not
    # ratio, we would pass
    val.categorical_strategy = None
    val.transform(t2)
示例#3
0
def test_docstr_remove_section():
    d = _DocstrMap(docstr_c)

    # Remove something that DOES exist
    d.remove_section("See Also")
    expected = """
    This is a docstr that has many sections.

    Parameters
    ----------
    x : object
        some object
    
    Notes
    -----
    This should never, under ANY circumstance, be used.
    """

    assert_docstrs_equal(d.make(), expected)

    # now show we don't break down if we try to remove it again
    d.remove_section("See Also", raise_if_missing=False)
    assert_docstrs_equal(d.make(), expected)

    # show we DO break down if raise is specifed
    assert_raises(ValueError,
                  d.remove_section,
                  "See Also",
                  raise_if_missing=True)
示例#4
0
文件: test_qr.py 项目: waszczak/skoot
def test_qr():
    # test just the decomp first
    q = QRDecomposition(X)
    aux = q.qraux
    assert_array_almost_equal(
        aux, np.array([1.07056264, 1.0559255, 1.03857984, 1.04672249]))

    # test that we can get the rank
    assert q.get_rank() == 4

    # test that we can get the R matrix and that it's rank 4
    assert q.get_R_rank() == 4

    # next, let's test that we can get the coefficients:
    coef = q.get_coef(X)
    assert_array_almost_equal(
        coef,
        np.array([
            [1.00000000e+00, 1.96618714e-16, -0.00000000e+00, -2.00339858e-16],
            [3.00642915e-16, 1.00000000e+00, -0.00000000e+00, 1.75787325e-16],
            [-4.04768123e-16, 4.83060041e-17, 1.00000000e+00, 4.23545747e-16],
            [-1.19866575e-16, -1.74365433e-17, 1.10216442e-17, 1.00000000e+00]
        ]))

    # ensure dimension error
    assert_raises(ValueError, q.get_coef, X[:140, :])
示例#5
0
def test_check_dataframe_infinite():
    X_nan = X.mask(X < 0.3)

    # should not raise initially
    X_copy, _ = check_dataframe(X_nan)
    assert X_copy.equals(X_nan)

    # this will raise, since assert_all_finite is True
    assert_raises(ValueError, check_dataframe, X_nan, assert_all_finite=True)
示例#6
0
def test_alternative_exception():
    def func_that_raises_type_error():
        raise TypeError("This is a type error!")

    def func_that_asserts_incorrectly():
        assert_raises(ValueError, func_that_raises_type_error)

    assert_raises(TypeError, func_that_raises_type_error)
    assert_raises(TypeError, func_that_asserts_incorrectly)
示例#7
0
def test_hypothesis_validator():
    # show that our validator passes for similar data
    DistHypothesisValidator().fit_transform(X)
    DistHypothesisValidator().fit_transform(X +
                                            (np.random.rand(*X.shape) * 0.001))

    # and show that we fail for different data
    assert_raises(ValueError,
                  DistHypothesisValidator(action="raise").fit(X).transform, X2)
示例#8
0
def test_fails_on_existing_location():
    loc = "exists.pkl"
    try:
        with open(loc, 'w') as tmp:
            tmp.write("Just creating a file so it raises")
        assert_raises(OSError, assert_persistable, None,
                      location=loc, X=None, y=None)

    finally:
        os.unlink(loc)
示例#9
0
def test_custom_validator():
    CustomValidator().fit_transform(X)  # works when func=None

    # will work with these custom funcs
    sub_2 = (lambda v: np.max(v) < 2.)
    CustomValidator(func=sub_2).fit_transform(X)

    # won't necessarily work on the X2, though
    assert_raises(ValueError,
                  CustomValidator(action="raise", func=sub_2).fit(X).transform,
                  X2)
示例#10
0
def test_nzv_bad_freq_cut():
    X = pd.DataFrame.from_records(data=np.array([[1, 2, 3], [4, 5, 3],
                                                 [6, 7, 5]]),
                                  columns=['a', 'b', 'c'])

    # show fails with a bad float value
    nzv_float = NearZeroVarianceFilter(freq_cut=1.)
    assert_raises(ValueError, nzv_float.fit, X)

    # show fails with a non-float/int
    nzv_str = NearZeroVarianceFilter(freq_cut='1.')
    assert_raises(ValueError, nzv_str.fit, X)
示例#11
0
def test_validate_test_cols():
    fit = ['a', 'b', 'c']
    test = ['a', 'b', 'c']

    # this will pass; all fit are in test
    validate_test_set_columns(fit, test)

    # this will also pass; all fit are in test
    test.append('d')
    validate_test_set_columns(fit, test)

    # this will NOT pass; one is now missing from test
    test = test[1:]
    assert_raises(ValueError, validate_test_set_columns, fit, test)
示例#12
0
def test_docstr_create_section():
    d = _DocstrMap(docstr_b)

    # want to append a "See also" to this
    d.create_section("New Section", ['    SomeContent', ''])

    expected = """
    This is another docstr. It does NOT contain a 'params'
    section, so it will not be hit be the append_params test.
    However, it does contain a 'see_also' section.

    See Also
    --------
    SomeType
    SomeOtherType
    
    New Section
    -----------
    SomeContent
    """

    ds = d.make()
    assert_docstrs_equal(ds, expected)

    # show create fails if not overwrite specified
    assert_raises(ValueError, d.create_section, "New Section",
                  ['    SomeOtherContent', ''], False)

    # show we CAN replace the ds section if specified
    d.create_section("New Section", ['    SomeOtherContent', ''],
                     overwrite=True)

    expected2 = """
    This is another docstr. It does NOT contain a 'params'
    section, so it will not be hit be the append_params test.
    However, it does contain a 'see_also' section.

    See Also
    --------
    SomeType
    SomeOtherType

    New Section
    -----------
    SomeOtherContent
    """

    ds = d.make()
    assert_docstrs_equal(ds, expected2)
示例#13
0
def test_linear_combos():
    lcf = LinearCombinationFilter().fit(Z)
    assert lcf.drop_ == ['C'], lcf.drop_

    z = lcf.transform(Z)
    assert_array_equal(z.columns.values, ['A', 'B'])
    assert (z.B == 1).all()

    # test on no linear combos
    lcf = LinearCombinationFilter(cols=['A', 'B']).fit(Z)
    assert not lcf.drop_
    assert Z.equals(lcf.transform(Z))

    # test too few features
    assert_raises(ValueError, LinearCombinationFilter(cols=['A']).fit, Z)
示例#14
0
def test_chunking():
    def listify(chunks):
        return [list(c) for c in chunks]

    chunks = listify(chunk(range(11), 3))
    assert chunks == [[0, 1, 2, 3], [4, 5, 6, 7], [8, 9, 10]]

    # test same on known generator
    assert chunks == listify(chunk((i for i in range(11)), 3))

    # test corner where the input is of len 1
    assert listify(chunk([1], 1)) == [[1]]

    # this is the function that will fail
    failing_func = (lambda: list(chunk([1], 2)))
    assert_raises(ValueError, failing_func)
示例#15
0
def test_date_trans():
    _6_1 = dt.strptime("06-01-2018", "%m-%d-%Y")
    _6_2 = dt.strptime("06-02-2018", "%m-%d-%Y")
    _6_3 = dt.strptime("06-03-2018", "%m-%d-%Y")
    _6_4 = dt.strptime("06-04-2018", "%m-%d-%Y")
    _6_5 = dt.strptime("06-05-2018", "%m-%d-%Y")

    data = [
        # N/A, Specified, Pre-datetype, Infer
        [1, "06/01/2018", _6_1, "06/01/2018"],
        [2, "06/02/2018", _6_2, "06/02/2018"],
        [3, "06/03/2018", _6_3, "06/03/2018"],
        [4, None, _6_4, None],
        [4, "06/05/2018", None, "06/05/2018"]
    ]

    df = pd.DataFrame.from_records(data, columns=["a", "b", "c", "d"])
    converter = DateTransformer(cols=["b", "c", "d"],
                                date_format=["%m/%d/%Y", None, None])

    trans = converter.fit_transform(df)
    b = trans["b"].tolist()
    c = trans["c"].tolist()
    d = trans["d"].tolist()

    # assert which are null
    assert b[3] is pd.NaT
    assert c[4] is pd.NaT
    assert d[3] is pd.NaT

    # assert on the dt values
    assert b[0] == d[0] == c[0] == _6_1
    assert b[1] == d[1] == c[1] == _6_2
    assert b[2] == d[2] == c[2] == _6_3
    assert c[3] == _6_4
    assert b[4] == d[4] == _6_5

    # Test that we fail on column A
    assert_raises(ValueError, DateTransformer(cols=["a"]).fit, df)

    # But if we allow int64, show it will pass.
    DateTransformer(cols=["a"], allowed_types=("int64")).fit(df)
示例#16
0
def test_interaction_corners():
    # assert fails with a non-function arg
    assert_raises(TypeError,
                  InteractionTermTransformer(interaction_function='a').fit,
                  X_pd)

    # test with just two cols
    # try with no cols arg
    trans = InteractionTermTransformer(cols=['a', 'b'])
    X_trans = trans.fit_transform(X_pd)
    expected_names = ['a', 'b', 'c', 'd', 'a_b_I']
    assert all([
        i == j for i, j in zip(X_trans.columns.tolist(), expected_names)
    ])  # assert col names equal

    assert_array_equal(
        X_trans.as_matrix(),
        np.array([[0, 1, 0, 1, 0], [0, 0, 1, 1, 0], [0, 0, 0, 1, 0],
                  [1, 1, 1, 0, 1]]))

    # test diff columns on test set to force value error
    X_test = X_pd.drop(['a'], axis=1)
    assert_raises(ValueError, trans.transform, X_test)
示例#17
0
def test_le_encode_ignore():
    le = LabelEncoder()
    vec1 = np.array(["a", "b", "c", "d"])
    le.fit(vec1)

    # test where all present
    col, trans, clz = _le_transform(col="C",
                                    vec=vec1,
                                    le=le,
                                    handle="error",
                                    sep="_")

    assert col == "C", col
    assert_array_equal(trans, [0, 1, 2, 3])
    assert clz == ["C_a", "C_b", "C_c", "C_d"], clz

    # now test where we have a new level and we ignore
    vec2 = np.array(["a", "b", "c", "d", "e", "f"])
    col2, trans2, cls2 = _le_transform(col="C2",
                                       vec=vec2,
                                       le=le,
                                       handle="ignore",
                                       sep="_")

    assert col2 == "C2", col2
    assert_array_equal(trans2, [0, 1, 2, 3, 4, 4])
    assert cls2 == ["C2_a", "C2_b", "C2_c", "C2_d"], cls2

    # test where we have a new level and we do NOT ignore
    assert_raises(ValueError,
                  _le_transform,
                  col="C",
                  vec=vec2,
                  le=le,
                  handle="error",
                  sep="_")
示例#18
0
def test_selective_imputer_bad_strategies():
    # raises for a bad strategy string
    imputer = SelectiveImputer(strategy="bad strategy")
    assert_raises(ValueError, imputer.fit, X)

    # raises for a dim mismatch in cols and strategy
    imputer = SelectiveImputer(cols=['a'], strategy=['mean', 'mean'])
    assert_raises(ValueError, imputer.fit, X)

    # test type error for bad strategy
    imputer = SelectiveImputer(strategy=1)
    assert_raises(TypeError, imputer.fit, X)

    # test dict input that does not match dim-wise
    imputer = SelectiveImputer(cols=['a'],
                               strategy={
                                   'a': 'mean',
                                   'b': 'median'
                               })
    assert_raises(ValueError, imputer.fit, X)

    # test a dict input with bad columns breaks
    imputer = SelectiveImputer(strategy={'a': 'mean', 'D': 'median'})
    assert_raises(ValueError, imputer.fit, X)
示例#19
0
def test_get_callable_type_error():
    strat = 123  # not a string or callable
    valid = {"some strategy": (lambda: None)}
    assert_raises(TypeError, _get_callable, strat, valid)
示例#20
0
 def func_that_asserts_incorrectly():
     assert_raises(ValueError, func_that_raises_type_error)
示例#21
0
def test_bagged_classifier_continuous():
    imputer = BaggedClassifierImputer()

    # fails on continuous data!
    assert_raises(ValueError, imputer.fit, X)
示例#22
0
 def func_that_fails_assertion():
     assert_raises(ValueError, func=(lambda: None))
示例#23
0
def test_failing_assert_raises():
    def func_that_fails_assertion():
        assert_raises(ValueError, func=(lambda: None))

    assert_raises(AssertionError, func_that_fails_assertion)
示例#24
0
def test_assert_raises():
    assert_raises(ValueError, func_that_raises)
示例#25
0
def test_haversine_bad_cols():
    est = HaversineFeatures(cols=None)
    assert_raises(TypeError, est.fit, X)
示例#26
0
def test_bad_units():
    assert_raises(ValueError, haversine_distance, None, None, None, None,
                  "radians")
示例#27
0
def test_mcf_non_finite():
    mcf = MultiCorrFilter(threshold=0.75)
    assert_raises(ValueError, mcf.fit, sparse)
示例#28
0
def test_get_callable_key_error():
    strat = "some strategy"
    valid = {"some other strategy": (lambda: None)}
    assert_raises(ValueError, _get_callable, strat, valid)
示例#29
0
 def f(binner, exc):
     assert_raises(exc, binner.fit, iris)
示例#30
0
def test_none_present():
    series = pd.Series(np.ones(5) * np.nan)
    mask = pd.isnull(series)
    assert_raises(ValueError, _get_present_values, series, mask)