def test_date_trans(): converter = DateTransformer(cols=["b", "c", "d"], date_format=["%m/%d/%Y", None, None]) trans = converter.fit_transform(df) b = trans["b"].tolist() c = trans["c"].tolist() d = trans["d"].tolist() # assert which are null assert b[3] is pd.NaT assert c[4] is pd.NaT assert d[3] is pd.NaT # assert on the dt values assert b[0] == d[0] == c[0] == _6_1 assert b[1] == d[1] == c[1] == _6_2 assert b[2] == d[2] == c[2] == _6_3 assert c[3] == _6_4 assert b[4] == d[4] == _6_5 # Test that we fail on column A assert_raises(ValueError, DateTransformer(cols=["a"]).fit, df) # But if we allow int64, show it will pass. DateTransformer(cols=["a"], allowed_types=("int64")).fit(df)
def test_chi2_validator(): data = np.random.RandomState(42).rand(10000, 4) # these will all become categorical x = (data > 0.4).astype(int) x[data > 0.75] = 2 # Now split and test train = x[:9000, :] test = x[9000:, :] # show the validator will work initially since they're all # roughly the same number of occurrences val = DistHypothesisValidator(action="raise") val.fit(train).transform(test) # Make some adjustments to force this to fail # test set col 0 should have nothing but 2s t2 = test.copy() t2[:, 0] = 2 assert_raises(ValueError, val.transform, t2) # now show that if the strategy for categorical vars were not # ratio, we would pass val.categorical_strategy = None val.transform(t2)
def test_docstr_remove_section(): d = _DocstrMap(docstr_c) # Remove something that DOES exist d.remove_section("See Also") expected = """ This is a docstr that has many sections. Parameters ---------- x : object some object Notes ----- This should never, under ANY circumstance, be used. """ assert_docstrs_equal(d.make(), expected) # now show we don't break down if we try to remove it again d.remove_section("See Also", raise_if_missing=False) assert_docstrs_equal(d.make(), expected) # show we DO break down if raise is specifed assert_raises(ValueError, d.remove_section, "See Also", raise_if_missing=True)
def test_qr(): # test just the decomp first q = QRDecomposition(X) aux = q.qraux assert_array_almost_equal( aux, np.array([1.07056264, 1.0559255, 1.03857984, 1.04672249])) # test that we can get the rank assert q.get_rank() == 4 # test that we can get the R matrix and that it's rank 4 assert q.get_R_rank() == 4 # next, let's test that we can get the coefficients: coef = q.get_coef(X) assert_array_almost_equal( coef, np.array([ [1.00000000e+00, 1.96618714e-16, -0.00000000e+00, -2.00339858e-16], [3.00642915e-16, 1.00000000e+00, -0.00000000e+00, 1.75787325e-16], [-4.04768123e-16, 4.83060041e-17, 1.00000000e+00, 4.23545747e-16], [-1.19866575e-16, -1.74365433e-17, 1.10216442e-17, 1.00000000e+00] ])) # ensure dimension error assert_raises(ValueError, q.get_coef, X[:140, :])
def test_check_dataframe_infinite(): X_nan = X.mask(X < 0.3) # should not raise initially X_copy, _ = check_dataframe(X_nan) assert X_copy.equals(X_nan) # this will raise, since assert_all_finite is True assert_raises(ValueError, check_dataframe, X_nan, assert_all_finite=True)
def test_alternative_exception(): def func_that_raises_type_error(): raise TypeError("This is a type error!") def func_that_asserts_incorrectly(): assert_raises(ValueError, func_that_raises_type_error) assert_raises(TypeError, func_that_raises_type_error) assert_raises(TypeError, func_that_asserts_incorrectly)
def test_hypothesis_validator(): # show that our validator passes for similar data DistHypothesisValidator().fit_transform(X) DistHypothesisValidator().fit_transform(X + (np.random.rand(*X.shape) * 0.001)) # and show that we fail for different data assert_raises(ValueError, DistHypothesisValidator(action="raise").fit(X).transform, X2)
def test_fails_on_existing_location(): loc = "exists.pkl" try: with open(loc, 'w') as tmp: tmp.write("Just creating a file so it raises") assert_raises(OSError, assert_persistable, None, location=loc, X=None, y=None) finally: os.unlink(loc)
def test_custom_validator(): CustomValidator().fit_transform(X) # works when func=None # will work with these custom funcs sub_2 = (lambda v: np.max(v) < 2.) CustomValidator(func=sub_2).fit_transform(X) # won't necessarily work on the X2, though assert_raises(ValueError, CustomValidator(action="raise", func=sub_2).fit(X).transform, X2)
def test_nzv_bad_freq_cut(): X = pd.DataFrame.from_records(data=np.array([[1, 2, 3], [4, 5, 3], [6, 7, 5]]), columns=['a', 'b', 'c']) # show fails with a bad float value nzv_float = NearZeroVarianceFilter(freq_cut=1.) assert_raises(ValueError, nzv_float.fit, X) # show fails with a non-float/int nzv_str = NearZeroVarianceFilter(freq_cut='1.') assert_raises(ValueError, nzv_str.fit, X)
def test_validate_test_cols(): fit = ['a', 'b', 'c'] test = ['a', 'b', 'c'] # this will pass; all fit are in test validate_test_set_columns(fit, test) # this will also pass; all fit are in test test.append('d') validate_test_set_columns(fit, test) # this will NOT pass; one is now missing from test test = test[1:] assert_raises(ValueError, validate_test_set_columns, fit, test)
def test_docstr_create_section(): d = _DocstrMap(docstr_b) # want to append a "See also" to this d.create_section("New Section", [' SomeContent', '']) expected = """ This is another docstr. It does NOT contain a 'params' section, so it will not be hit be the append_params test. However, it does contain a 'see_also' section. See Also -------- SomeType SomeOtherType New Section ----------- SomeContent """ ds = d.make() assert_docstrs_equal(ds, expected) # show create fails if not overwrite specified assert_raises(ValueError, d.create_section, "New Section", [' SomeOtherContent', ''], False) # show we CAN replace the ds section if specified d.create_section("New Section", [' SomeOtherContent', ''], overwrite=True) expected2 = """ This is another docstr. It does NOT contain a 'params' section, so it will not be hit be the append_params test. However, it does contain a 'see_also' section. See Also -------- SomeType SomeOtherType New Section ----------- SomeOtherContent """ ds = d.make() assert_docstrs_equal(ds, expected2)
def test_linear_combos(): lcf = LinearCombinationFilter().fit(Z) assert lcf.drop_ == ['C'], lcf.drop_ z = lcf.transform(Z) assert_array_equal(z.columns.values, ['A', 'B']) assert (z.B == 1).all() # test on no linear combos lcf = LinearCombinationFilter(cols=['A', 'B']).fit(Z) assert not lcf.drop_ assert Z.equals(lcf.transform(Z)) # test too few features assert_raises(ValueError, LinearCombinationFilter(cols=['A']).fit, Z)
def test_chunking(): def listify(chunks): return [list(c) for c in chunks] chunks = listify(chunk(range(11), 3)) assert chunks == [[0, 1, 2, 3], [4, 5, 6, 7], [8, 9, 10]] # test same on known generator assert chunks == listify(chunk((i for i in range(11)), 3)) # test corner where the input is of len 1 assert listify(chunk([1], 1)) == [[1]] # this is the function that will fail failing_func = (lambda: list(chunk([1], 2))) assert_raises(ValueError, failing_func)
def test_date_trans(): _6_1 = dt.strptime("06-01-2018", "%m-%d-%Y") _6_2 = dt.strptime("06-02-2018", "%m-%d-%Y") _6_3 = dt.strptime("06-03-2018", "%m-%d-%Y") _6_4 = dt.strptime("06-04-2018", "%m-%d-%Y") _6_5 = dt.strptime("06-05-2018", "%m-%d-%Y") data = [ # N/A, Specified, Pre-datetype, Infer [1, "06/01/2018", _6_1, "06/01/2018"], [2, "06/02/2018", _6_2, "06/02/2018"], [3, "06/03/2018", _6_3, "06/03/2018"], [4, None, _6_4, None], [4, "06/05/2018", None, "06/05/2018"] ] df = pd.DataFrame.from_records(data, columns=["a", "b", "c", "d"]) converter = DateTransformer(cols=["b", "c", "d"], date_format=["%m/%d/%Y", None, None]) trans = converter.fit_transform(df) b = trans["b"].tolist() c = trans["c"].tolist() d = trans["d"].tolist() # assert which are null assert b[3] is pd.NaT assert c[4] is pd.NaT assert d[3] is pd.NaT # assert on the dt values assert b[0] == d[0] == c[0] == _6_1 assert b[1] == d[1] == c[1] == _6_2 assert b[2] == d[2] == c[2] == _6_3 assert c[3] == _6_4 assert b[4] == d[4] == _6_5 # Test that we fail on column A assert_raises(ValueError, DateTransformer(cols=["a"]).fit, df) # But if we allow int64, show it will pass. DateTransformer(cols=["a"], allowed_types=("int64")).fit(df)
def test_interaction_corners(): # assert fails with a non-function arg assert_raises(TypeError, InteractionTermTransformer(interaction_function='a').fit, X_pd) # test with just two cols # try with no cols arg trans = InteractionTermTransformer(cols=['a', 'b']) X_trans = trans.fit_transform(X_pd) expected_names = ['a', 'b', 'c', 'd', 'a_b_I'] assert all([ i == j for i, j in zip(X_trans.columns.tolist(), expected_names) ]) # assert col names equal assert_array_equal( X_trans.as_matrix(), np.array([[0, 1, 0, 1, 0], [0, 0, 1, 1, 0], [0, 0, 0, 1, 0], [1, 1, 1, 0, 1]])) # test diff columns on test set to force value error X_test = X_pd.drop(['a'], axis=1) assert_raises(ValueError, trans.transform, X_test)
def test_le_encode_ignore(): le = LabelEncoder() vec1 = np.array(["a", "b", "c", "d"]) le.fit(vec1) # test where all present col, trans, clz = _le_transform(col="C", vec=vec1, le=le, handle="error", sep="_") assert col == "C", col assert_array_equal(trans, [0, 1, 2, 3]) assert clz == ["C_a", "C_b", "C_c", "C_d"], clz # now test where we have a new level and we ignore vec2 = np.array(["a", "b", "c", "d", "e", "f"]) col2, trans2, cls2 = _le_transform(col="C2", vec=vec2, le=le, handle="ignore", sep="_") assert col2 == "C2", col2 assert_array_equal(trans2, [0, 1, 2, 3, 4, 4]) assert cls2 == ["C2_a", "C2_b", "C2_c", "C2_d"], cls2 # test where we have a new level and we do NOT ignore assert_raises(ValueError, _le_transform, col="C", vec=vec2, le=le, handle="error", sep="_")
def test_selective_imputer_bad_strategies(): # raises for a bad strategy string imputer = SelectiveImputer(strategy="bad strategy") assert_raises(ValueError, imputer.fit, X) # raises for a dim mismatch in cols and strategy imputer = SelectiveImputer(cols=['a'], strategy=['mean', 'mean']) assert_raises(ValueError, imputer.fit, X) # test type error for bad strategy imputer = SelectiveImputer(strategy=1) assert_raises(TypeError, imputer.fit, X) # test dict input that does not match dim-wise imputer = SelectiveImputer(cols=['a'], strategy={ 'a': 'mean', 'b': 'median' }) assert_raises(ValueError, imputer.fit, X) # test a dict input with bad columns breaks imputer = SelectiveImputer(strategy={'a': 'mean', 'D': 'median'}) assert_raises(ValueError, imputer.fit, X)
def test_get_callable_type_error(): strat = 123 # not a string or callable valid = {"some strategy": (lambda: None)} assert_raises(TypeError, _get_callable, strat, valid)
def func_that_asserts_incorrectly(): assert_raises(ValueError, func_that_raises_type_error)
def test_bagged_classifier_continuous(): imputer = BaggedClassifierImputer() # fails on continuous data! assert_raises(ValueError, imputer.fit, X)
def func_that_fails_assertion(): assert_raises(ValueError, func=(lambda: None))
def test_failing_assert_raises(): def func_that_fails_assertion(): assert_raises(ValueError, func=(lambda: None)) assert_raises(AssertionError, func_that_fails_assertion)
def test_assert_raises(): assert_raises(ValueError, func_that_raises)
def test_haversine_bad_cols(): est = HaversineFeatures(cols=None) assert_raises(TypeError, est.fit, X)
def test_bad_units(): assert_raises(ValueError, haversine_distance, None, None, None, None, "radians")
def test_mcf_non_finite(): mcf = MultiCorrFilter(threshold=0.75) assert_raises(ValueError, mcf.fit, sparse)
def test_get_callable_key_error(): strat = "some strategy" valid = {"some other strategy": (lambda: None)} assert_raises(ValueError, _get_callable, strat, valid)
def f(binner, exc): assert_raises(exc, binner.fit, iris)
def test_none_present(): series = pd.Series(np.ones(5) * np.nan) mask = pd.isnull(series) assert_raises(ValueError, _get_present_values, series, mask)