def test_integration_nonperfect_linear(): X, y = data_gen(nrows=100, Non_perfect=True) X_train, X_test, y_train, y_test = train_test_split(X, y) scores = cross_validation(lm(), X=X_train, y=y_train, shuffle=False) summary = summary_cv(scores) assert summary['mean'] < 1 and summary['median'] < 1 and summary[ 'sd'] > 0, 'Non-perfect linear relation test does not give correct summary'
def test_index_match(): X, y = data_gen(nrows=100) X_train, X_test, y_train, y_test = train_test_split(X, y) v1 = X_train.append(X_test).X0.sort_values().as_matrix() v2 = X.X0.as_matrix() v3 = y_train.append(y_test).y.sort_values().as_matrix() v4 = y.as_matrix().flatten() assert (np.array_equal(v1, v2) and np.array_equal(v3, v4)), "X/y_train + X/y_test is not a complete set of X/y"
def test_shuffle_False(): X, y = data_gen(nrows=100) X_train, X_test, y_train, y_test = train_test_split(X, y, shuffle=False) v1 = X_train.append(X_test).X0.as_matrix() v2 = X.X0.as_matrix() v3 = y_train.append(y_test).y.as_matrix() v4 = y.as_matrix().flatten() assert (np.array_equal(v1, v2) and np.array_equal(v3, v4)), "X/y_train + X/y_test is shuffled while `shuffle=False`"
def test_shuffle_False_random_state_effect(): X, y = data_gen(nrows=100) X_train, X_test, y_train, y_test = train_test_split(X, y, shuffle=False, random_state=12345) v1 = X_train.append(X_test).X0.as_matrix() v2 = X.X0.as_matrix() v3 = y_train.append(y_test).y.as_matrix() v4 = y.as_matrix().flatten() assert (np.array_equal(v1, v2) and np.array_equal(v3, v4)), "`shuffle=False`: random state should have no effect"
def test_shuffle_False(): X, y = data_gen(nrows=100) X_train, X_test, y_train, y_test = train_test_split(X, y, shuffle=False) v1 = X_train.append(X_test).X0.as_matrix() v2 = X.X0.as_matrix() v3 = y_train.append(y_test).y.as_matrix() v4 = y.as_matrix().flatten() assert (np.array_equal(v1, v2) and np.array_equal( v3, v4)), "X/y_train + X/y_test is shuffled while `shuffle=False`"
def test_index_match(): X, y = data_gen(nrows=100) X_train, X_test, y_train, y_test = train_test_split(X, y) v1 = X_train.append(X_test).X0.sort_values().as_matrix() v2 = X.X0.as_matrix() v3 = y_train.append(y_test).y.sort_values().as_matrix() v4 = y.as_matrix().flatten() assert (np.array_equal(v1, v2) and np.array_equal( v3, v4)), "X/y_train + X/y_test is not a complete set of X/y"
def test_shuffle_as_boolean(): with pytest.raises(TypeError('`shuffle` must be True or False')): train_test_split(X=X, y=y, shuffle='1') with pytest.raises(TypeError('`shuffle` must be True or False')): train_test_split(X=X, y=y, shuffle=1) with pytest.raises(TypeError('`shuffle` must be True or False')): train_test_split(X=X, y=y, shuffle=1.0)
def test_shuffle_False_random_state_effect(): X, y = data_gen(nrows=100) X_train, X_test, y_train, y_test = train_test_split(X, y, shuffle=False, random_state=12345) v1 = X_train.append(X_test).X0.as_matrix() v2 = X.X0.as_matrix() v3 = y_train.append(y_test).y.as_matrix() v4 = y.as_matrix().flatten() assert (np.array_equal(v1, v2) and np.array_equal( v3, v4)), "`shuffle=False`: random state should have no effect"
def test_test_size_range_small_than_0(): X, y = data_gen() with pytest.raises(ValueError): train_test_split(X=X, y=y, test_size=- 0.1)
def test_random_state_range(): X, y = data_gen() with pytest.raises(ValueError): train_test_split(X=X, y=y, random_state=-10)
def test_test_size_range_small_than_0(): X, y = data_gen() with pytest.raises(ValueError): train_test_split(X=X, y=y, test_size=-0.1)
def test_test_size_as_number(): with pytest.raises(TypeError('`test_size` must be a number')): train_test_split(X=X, y=y, test_size='0.25')
def test_shuffle_as_boolean_not_string(): X, y = data_gen() with pytest.raises(TypeError): train_test_split(X=X, y=y, shuffle='1')
def test_random_state_as_number(): X, y = data_gen() with pytest.raises(TypeError): train_test_split(X=X, y=y, random_state='10')
def test_X_y_match(): X, y = data_gen() y = y[0:90] with pytest.raises(ValueError): train_test_split(X=X, y=y)
def test_X_y_Nrows(): X, y = data_gen(nrows=2) with pytest.raises(ValueError): train_test_split(X=X, y=y)
def test_random_state_range(): with pytest.raises(TypeError('`random_state` must be nonnegative')): train_test_split(X=X, y=y, random_state=-10)
def test_random_state_as_number(): with pytest.raises(TypeError('`random_state` must be a number or None')): train_test_split(X=X, y=y, random_state='10')
def test_test_size_range(): with pytest.raises(TypeError('`test_size` must be between 0 and 1')): train_test_split(X=X, y=y, test_size=2) with pytest.raises(TypeError('`test_size` must be between 0 and 1')): train_test_split(X=X, y=y, test_size=-1)
def test_dimension_match(): X, y = data_gen(nrows=10) X_train, X_test, y_train, y_test = train_test_split(X, y) assert ((X_train.shape[0] + X_test.shape[0]) == X.shape[0] and (y_train.shape[0] + y_test.shape[0]) == y.shape[ 0]), "total rows of X/y_train and Xy_test doesn't match nrows of X/y"
def test_y_one_column(): with pytest.raises(TypeError('`y` is more than one feature')): train_test_split(X=X, y=y_2)
def test_y_as_dataframe(): X, y = data_gen() with pytest.raises(TypeError): train_test_split(X=X, y="y")
def test_y_one_column(): X, y = data_gen() y = X with pytest.raises(ValueError): train_test_split(X=X, y=y)
def test_X_y_match(): with pytest.raises(TypeError("dim of `X` doesn't equal dim of `y`")): train_test_split(X=X_longer, y=y)
def test_dimension_match(): X, y = data_gen(nrows=10) X_train, X_test, y_train, y_test = train_test_split(X, y) assert ((X_train.shape[0] + X_test.shape[0]) == X.shape[0] and (y_train.shape[0] + y_test.shape[0]) == y.shape[0] ), "total rows of X/y_train and Xy_test doesn't match nrows of X/y"
def test_X_as_dataframe(): X, y = data_gen() with pytest.raises(TypeError): train_test_split(X="X", y=y)
def test_test_size_as_number(): X, y = data_gen() with pytest.raises(TypeError): train_test_split(X=X, y=y, test_size='0.5')
def test_shuffle_as_boolean_not_numeric(): X, y = data_gen() with pytest.raises(TypeError): train_test_split(X=X, y=y, shuffle=1.0)
def test_test_size_range_large_than_1(): X, y = data_gen() with pytest.raises(ValueError): train_test_split(X=X, y=y, test_size=1.1)
def test_X_y_Nrows(): with pytest.raises( TypeError('sample size is less than 3, too small for splitting')): train_test_split(X=X.iloc[0:2, :], y=y.iloc[0:2, :])