示例#1
0
def test_selective_pca():
    # create a copy of the original
    original = X.copy()

    # set the columns we'll fit to just be the first
    cols = [names[0]]  # 'a'

    # the "other" names, and their corresponding matrix
    comp_column_names = names[1:]
    compare_cols = original[comp_column_names].as_matrix()

    # now fit PCA on the first column only
    transformer = SelectivePCA(cols=cols, n_components=0.85).fit(original)
    transformed = transformer.transform(original)

    # get the untouched columns to compare. These should be equal!!
    untouched_cols = transformed[comp_column_names].as_matrix()
    assert_array_almost_equal(compare_cols, untouched_cols)

    # make sure the component is present in the columns
    assert 'PC1' in transformed.columns
    assert transformed.shape[1] == 4
    assert isinstance(transformer.get_decomposition(), PCA)
    assert SelectivePCA().get_decomposition() is None

    # test that cols was provided
    assert isinstance(transformer.cols, list)
    assert transformer.cols[0] == cols[0]

    # what if we want to weight it?
    pca_weighted = SelectivePCA(do_weight=True, n_components=0.99, as_df=True)\
        .fit_transform(original)
    assert_raises(AssertionError, assert_array_equal, pca_weighted,
                  transformed)
示例#2
0
文件: test_qr.py 项目: chrinide/skoot
def test_qr():
    # test just the decomp first
    q = QRDecomposition(X)
    aux = q.qraux
    assert_array_almost_equal(
        aux, np.array([1.07056264, 1.0559255, 1.03857984, 1.04672249]))

    # test that we can get the rank
    assert q.get_rank() == 4

    # test that we can get the R matrix and that it's rank 4
    assert q.get_R_rank() == 4

    # next, let's test that we can get the coefficients:
    coef = q.get_coef(X)
    assert_array_almost_equal(
        coef,
        np.array([
            [1.00000000e+00, 1.96618714e-16, -0.00000000e+00, -2.00339858e-16],
            [3.00642915e-16, 1.00000000e+00, -0.00000000e+00, 1.75787325e-16],
            [-4.04768123e-16, 4.83060041e-17, 1.00000000e+00, 4.23545747e-16],
            [-1.19866575e-16, -1.74365433e-17, 1.10216442e-17, 1.00000000e+00]
        ]))

    # ensure dimension error
    assert_raises(ValueError, q.get_coef, X[:140, :])
示例#3
0
def test_alternative_exception():
    def func_that_raises_type_error():
        raise TypeError("This is a type error!")

    def func_that_asserts_incorrectly():
        assert_raises(ValueError, func_that_raises_type_error)

    assert_raises(TypeError, func_that_raises_type_error)
    assert_raises(TypeError, func_that_asserts_incorrectly)
示例#4
0
def test_check_dataframe_infinite():
    X_nan = X.mask(X < 0.3)

    # should not raise initially
    X_copy, _ = check_dataframe(X_nan)
    assert X_copy.equals(X_nan)

    # this will raise, since assert_all_finite is True
    assert_raises(ValueError, check_dataframe, X_nan, assert_all_finite=True)
示例#5
0
def test_nzv_bad_freq_cut():
    X = pd.DataFrame.from_records(data=np.array([[1, 2, 3], [4, 5, 3],
                                                 [6, 7, 5]]),
                                  columns=['a', 'b', 'c'])

    # show fails with a bad float value
    nzv_float = NearZeroVarianceFilter(freq_cut=1.)
    assert_raises(ValueError, nzv_float.fit, X)

    # show fails with a non-float/int
    nzv_str = NearZeroVarianceFilter(freq_cut='1.')
    assert_raises(ValueError, nzv_str.fit, X)
示例#6
0
def test_validate_test_cols():
    fit = ['a', 'b', 'c']
    test = ['a', 'b', 'c']

    # this will pass; all fit are in test
    validate_test_set_columns(fit, test)

    # this will also pass; all fit are in test
    test.append('d')
    validate_test_set_columns(fit, test)

    # this will NOT pass; one is now missing from test
    test = test[1:]
    assert_raises(ValueError, validate_test_set_columns, fit, test)
示例#7
0
def test_linear_combos():
    lcf = LinearCombinationFilter().fit(Z)
    assert lcf.drop_ == ['C'], lcf.drop_

    z = lcf.transform(Z)
    assert_array_equal(z.columns.values, ['A', 'B'])
    assert (z.B == 1).all()

    # test on no linear combos
    lcf = LinearCombinationFilter(cols=['A', 'B']).fit(Z)
    assert not lcf.drop_
    assert Z.equals(lcf.transform(Z))

    # test too few features
    assert_raises(ValueError, LinearCombinationFilter(cols=['A']).fit, Z)
示例#8
0
def test_interaction_corners():
    # assert fails with a non-function arg
    assert_raises(TypeError,
                  InteractionTermTransformer(interaction_function='a').fit,
                  X_pd)

    # test with just two cols
    # try with no cols arg
    trans = InteractionTermTransformer(cols=['a', 'b'])
    X_trans = trans.fit_transform(X_pd)
    expected_names = ['a', 'b', 'c', 'd', 'a_b_I']
    assert all([
        i == j for i, j in zip(X_trans.columns.tolist(), expected_names)
    ])  # assert col names equal

    assert_array_equal(
        X_trans.as_matrix(),
        np.array([[0, 1, 0, 1, 0], [0, 0, 1, 1, 0], [0, 0, 0, 1, 0],
                  [1, 1, 1, 0, 1]]))

    # test diff columns on test set to force value error
    X_test = X_pd.drop(['a'], axis=1)
    assert_raises(ValueError, trans.transform, X_test)
示例#9
0
def test_selective_imputer_bad_strategies():
    # raises for a bad strategy string
    imputer = SelectiveImputer(strategy="bad strategy")
    assert_raises(ValueError, imputer.fit, X)

    # raises for a dim mismatch in cols and strategy
    imputer = SelectiveImputer(cols=['a'], strategy=['mean', 'mean'])
    assert_raises(ValueError, imputer.fit, X)

    # test type error for bad strategy
    imputer = SelectiveImputer(strategy=1)
    assert_raises(TypeError, imputer.fit, X)

    # test dict input that does not match dim-wise
    imputer = SelectiveImputer(cols=['a'],
                               strategy={
                                   'a': 'mean',
                                   'b': 'median'
                               })
    assert_raises(ValueError, imputer.fit, X)

    # test a dict input with bad columns breaks
    imputer = SelectiveImputer(strategy={'a': 'mean', 'D': 'median'})
    assert_raises(ValueError, imputer.fit, X)
示例#10
0
def test_failing_assert_raises():
    def func_that_fails_assertion():
        assert_raises(ValueError, func=(lambda: None))

    assert_raises(AssertionError, func_that_fails_assertion)
示例#11
0
def test_assert_raises():
    assert_raises(ValueError, func_that_raises)
示例#12
0
def test_mcf_non_finite():
    mcf = MultiCorrFilter(threshold=0.75)
    assert_raises(ValueError, mcf.fit, sparse)
示例#13
0
def test_check_dataframe_bad_X():
    assert_raises(TypeError, check_dataframe, 'string')
示例#14
0
def test_check_dataframe_array_cols():
    assert_raises(ValueError, check_dataframe, array,
                  cols=[1, 2, 3, 4, 5])
示例#15
0
 def func_that_asserts_incorrectly():
     assert_raises(ValueError, func_that_raises_type_error)
示例#16
0
def test_get_callable_key_error():
    strat = "some strategy"
    valid = {"some other strategy": (lambda: None)}
    assert_raises(ValueError, _get_callable, strat, valid)
示例#17
0
def test_bagged_classifier_continuous():
    imputer = BaggedClassifierImputer()

    # fails on continuous data!
    assert_raises(ValueError, imputer.fit, X)
示例#18
0
def test_bagged_regressor_single_predictor_corner():
    # fails because only one predictor, and it's in cols
    imputer = BaggedRegressorImputer(predictors=['a'])
    assert_raises(ValueError, imputer.fit, X)
示例#19
0
def test_validate_multiple_rows():
    X_copy = np.random.rand(2, 2)
    assert_raises(ValueError, validate_multiple_rows, "cls", X_copy[:1, :])

    # this works
    validate_multiple_rows("cls", X_copy)
示例#20
0
 def func_that_fails_assertion():
     assert_raises(ValueError, func=(lambda: None))
示例#21
0
def test_get_callable_type_error():
    strat = 123  # not a string or callable
    valid = {"some strategy": (lambda: None)}
    assert_raises(TypeError, _get_callable, strat, valid)
示例#22
0
def test_check_dataframe_bad_cols():
    # a check with all columns present
    assert_raises(ValueError, check_dataframe,
                  X, cols=['bad', 'cols'])
示例#23
0
def test_raise_build_error():
    try:
        raise ValueError("this is a dummy err msg")
    except ValueError as v:
        assert_raises(ImportError, raise_build_error, v)
示例#24
0
def test_none_present():
    series = pd.Series(np.ones(5) * np.nan)
    mask = pd.isnull(series)
    assert_raises(ValueError, _get_present_values, series, mask)