Пример #1
0
def test_sklearn_lasso_cv():
    # Regression
    sklearn_regressor_cls = get_sklearn_learner('sklearn.linear_model.LassoCV')
    dataset = Abalone()
    target_column = dataset.y_column
    df = dataset.df

    # Features generation
    features_df = df.kxy.generate_features(entity=None,
                                           max_lag=None,
                                           entity_name='*',
                                           exclude=[target_column])

    # Model building
    results = features_df.kxy.fit(target_column, sklearn_regressor_cls, \
     problem_type='regression', additive_learning=True, return_scores=True, \
     n_down_perf_before_stop=1)
    assert results['Testing R-Squared'] == '0.547'
    print(results['Selected Variables'])
    assert results['Selected Variables'] == ['Shell weight', 'Shucked weight', 'Whole weight', \
     'Shell weight.ABS(* - Q25(*))', 'Viscera weight.ABS(* - MEDIAN(*))', 'Viscera weight.ABS(* - MEAN(*))', \
     'Height', 'Length', 'Diameter', 'Sex_I', 'Shucked weight.ABS(* - MEDIAN(*))', 'Diameter.ABS(* - MEDIAN(*))', \
     'Viscera weight.ABS(* - Q75(*))', 'Viscera weight.ABS(* - Q25(*))', 'Diameter.ABS(* - Q25(*))', 'Sex_M', \
     'Sex_F', 'Shucked weight.ABS(* - Q75(*))', 'Shucked weight.ABS(* - Q25(*))', 'Diameter.ABS(* - Q75(*))', \
     'Length.ABS(* - Q75(*))']
Пример #2
0
def test_lean_boosted_pytorch_regressor():
    import torch
    torch.manual_seed(0)
    # Regression
    layers = [(10, 'relu'), (5, 'relu'), (1, None)]
    pt_regressor_cls = get_pytorch_dense_learner('skorch.NeuralNetRegressor',
                                                 layers,
                                                 max_epochs=10,
                                                 batch_size=100)
    dataset = Abalone()
    target_column = dataset.y_column
    df = dataset.df

    # Features generation
    features_df = df.kxy.generate_features(entity=None,
                                           max_lag=None,
                                           entity_name='*',
                                           exclude=[target_column],
                                           fill_na=True)

    # Model building
    results = features_df.kxy.fit(target_column, pt_regressor_cls, \
     problem_type='regression', additive_learning=True, return_scores=True, \
     n_down_perf_before_stop=3)
    assert results['Testing R-Squared'] == '0.424'
    print(results['Selected Variables'])
    assert results['Selected Variables'] == ['Shell weight', 'Shucked weight', 'Whole weight', \
     'Shell weight.ABS(* - Q25(*))', 'Viscera weight.ABS(* - MEDIAN(*))', 'Viscera weight.ABS(* - MEAN(*))', \
     'Height', 'Length', 'Diameter', 'Sex_I', 'Shucked weight.ABS(* - MEDIAN(*))', 'Diameter.ABS(* - MEDIAN(*))', \
     'Viscera weight.ABS(* - Q75(*))', 'Viscera weight.ABS(* - Q25(*))', 'Diameter.ABS(* - Q25(*))', 'Sex_M', \
     'Sex_F', 'Shucked weight.ABS(* - Q75(*))', 'Shucked weight.ABS(* - Q25(*))', 'Diameter.ABS(* - Q75(*))']
Пример #3
0
def test_linear_regression():
    regressor_cls = get_sklearn_learner(
        'sklearn.linear_model.LinearRegression')
    from warnings import simplefilter
    from sklearn.exceptions import ConvergenceWarning
    simplefilter("ignore", category=ConvergenceWarning)

    fs = RFE(regressor_cls)

    dataset = Abalone()
    target_column = dataset.y_column
    df = dataset.df

    # Features generation
    features_df = df.kxy.generate_features(entity=None,
                                           max_lag=None,
                                           entity_name='*',
                                           exclude=[target_column])

    # Feature selection
    x_columns = [_ for _ in features_df.columns if _ != target_column]
    x_df = features_df[x_columns]
    y_df = features_df[[target_column]]
    n_vars = max(x_df.shape[1] - 5, 1)
    m = fs.fit(x_df, y_df, n_vars)

    # Assertions
    assert len(fs.selected_variables) == n_vars
    assert fs.selected_variables == ['Height.ABS(* - MEDIAN(*))', 'Height.ABS(* - MEAN(*))', 'Viscera weight.ABS(* - MEDIAN(*))', \
     'Shell weight', 'Diameter.ABS(* - MEAN(*))', 'Shell weight.ABS(* - MEDIAN(*))', 'Shucked weight', 'Viscera weight.ABS(* - MEAN(*))', \
     'Shell weight.ABS(* - MEAN(*))', 'Diameter.ABS(* - MEDIAN(*))', 'Diameter', 'Height', 'Length.ABS(* - Q25(*))', \
     'Shucked weight.ABS(* - Q75(*))', 'Height.ABS(* - Q75(*))', 'Viscera weight', 'Shell weight.ABS(* - Q25(*))', 'Diameter.ABS(* - Q75(*))', \
     'Shucked weight.ABS(* - MEDIAN(*))', 'Whole weight.ABS(* - Q25(*))', 'Length', 'Shucked weight.ABS(* - MEAN(*))', 'Viscera weight.ABS(* - Q75(*))', \
     'Shucked weight.ABS(* - Q25(*))', 'Whole weight', 'Whole weight.ABS(* - MEAN(*))', 'Viscera weight.ABS(* - Q25(*))', 'Whole weight.ABS(* - Q75(*))', \
     'Whole weight.ABS(* - MEDIAN(*))', 'Length.ABS(* - MEDIAN(*))', 'Length.ABS(* - Q75(*))', 'Length.ABS(* - MEAN(*))', 'Shell weight.ABS(* - Q75(*))']
Пример #4
0
def test_lean_boosted_sklearn_regressor():
    for clz in [
            'sklearn.neighbors.KNeighborsRegressor',
            'sklearn.linear_model.LassoCV',
            'sklearn.linear_model.LinearRegression'
    ]:
        # Regression
        sklearn_regressor_cls = get_sklearn_learner(clz)
        dataset = Abalone()
        target_column = dataset.y_column
        df = dataset.df

        # Features generation
        features_df = df.kxy.generate_features(entity=None,
                                               max_lag=None,
                                               entity_name='*',
                                               exclude=[target_column])

        # Model building
        results = features_df.kxy.fit(target_column, sklearn_regressor_cls, \
         problem_type='regression', additive_learning=True, return_scores=True, \
         n_down_perf_before_stop=1)
        model = results['predictor'].models[0]
        feature_columns = results['Selected Variables']
        x = features_df[feature_columns].values
        predictions = model.predict(x)
        path = '../kxy/misc/cache/%s-%s.sav' % (dataset.name, clz)
        model.save(path)

        loaded_model = sklearn_regressor_cls(path=path)
        loaded_predictions = loaded_model.predict(x)

        assert np.allclose(predictions, loaded_predictions)
Пример #5
0
def test_rfe_predictor_xgboost():
    for clz in ['xgboost.XGBRegressor']:
        # Regression
        xgboost_regressor_cls = get_xgboost_learner(clz)
        dataset = Abalone()
        target_column = dataset.y_column
        df = dataset.df

        # Features generation
        features_df = df.kxy.generate_features(entity=None,
                                               max_lag=None,
                                               entity_name='*',
                                               exclude=[target_column])

        # Model building
        results = features_df.kxy.fit(target_column, xgboost_regressor_cls, \
         problem_type='regression', feature_selection_method='rfe', rfe_n_features=5)
        predictor = results['predictor']
        predictions = predictor.predict(features_df)
        path = '../kxy/misc/cache/%s-%s.sav' % (dataset.name, clz)
        predictor.save(path)

        loaded_predictor = RFEPredictor.load(path, xgboost_regressor_cls)
        loaded_predictions = loaded_predictor.predict(features_df)

        assert np.allclose(predictions, loaded_predictions)
Пример #6
0
def test_non_additive_lean_boosted_regressor():
    # Regression
    params = {
        'objective': 'rmse',
        'boosting_type': 'gbdt',
        'num_leaves': 100,
        'n_jobs': -1,
        'learning_rate': 0.1,
        'feature_fraction': 0.8,
        'bagging_fraction': 0.8,
        'verbose': -1,
    }
    lightgbm_regressor_cls = get_lightgbm_learner_learning_api(params, num_boost_round=10000, \
     split_random_seed=42)
    dataset = Abalone()
    target_column = dataset.y_column
    df = dataset.df

    # Features generation
    features_df = df.kxy.generate_features(entity=None,
                                           max_lag=None,
                                           entity_name='*',
                                           exclude=[target_column])

    # Model building
    results = features_df.kxy.fit(target_column, lightgbm_regressor_cls, \
     problem_type='regression', additive_learning=False, return_scores=True, \
     n_down_perf_before_stop=1)
    assert results['Testing R-Squared'] == '0.554'
    assert results['Selected Variables'] == ['Shell weight', 'Shucked weight', 'Whole weight', 'Shell weight.ABS(* - Q25(*))',\
     'Viscera weight.ABS(* - MEDIAN(*))', 'Viscera weight.ABS(* - MEAN(*))', 'Height', 'Length', 'Diameter', 'Sex_I',\
     'Shucked weight.ABS(* - MEDIAN(*))', 'Diameter.ABS(* - MEDIAN(*))', 'Viscera weight.ABS(* - Q75(*))',\
     'Viscera weight.ABS(* - Q25(*))', 'Diameter.ABS(* - Q25(*))', 'Sex_M', 'Sex_F']
Пример #7
0
def test_save_pfs():
    # Regression
    xgboost_regressor_cls = get_xgboost_learner('xgboost.XGBRegressor')
    dataset = Abalone()
    target_column = dataset.y_column
    df = dataset.df

    # Features generation
    features_df = df.kxy.generate_features(entity=None,
                                           max_lag=None,
                                           entity_name='*',
                                           exclude=[target_column])

    # Model building
    path = 'Abalone'
    results = features_df.kxy.fit(target_column, xgboost_regressor_cls, \
     problem_type='regression', feature_selection_method='pfs', \
     path=path)
    loaded_predictor = PFSPredictor().load(path, xgboost_regressor_cls)
    feature_directions = loaded_predictor.feature_directions
    assert feature_directions.shape[1] == features_df.shape[1] - 1
    predictions = loaded_predictor.predict(features_df)
    assert len(predictions.columns) == 1
    assert target_column in predictions.columns
    assert set(features_df.index).difference(set(predictions.index)) == set()
    assert set(predictions.index).difference(set(features_df.index)) == set()
Пример #8
0
def test_lean_boosted_tensorflow_regressor():
    import tensorflow as tf
    tf.random.set_seed(0)
    # Regression
    layers = [(10, 'relu'), (5, 'relu'), (1, 'linear')]
    loss = 'mean_absolute_error'
    optimizer = 'adam'
    tf_regressor_cls = get_tensorflow_dense_learner('KerasRegressor', layers, loss, optimizer=optimizer, \
     epochs=10, batch_size=100)
    dataset = Abalone()
    target_column = dataset.y_column
    df = dataset.df

    # Features generation
    features_df = df.kxy.generate_features(entity=None,
                                           max_lag=None,
                                           entity_name='*',
                                           exclude=[target_column],
                                           fill_na=True)

    # Model building
    results = features_df.kxy.fit(target_column, tf_regressor_cls, \
     problem_type='regression', additive_learning=True, return_scores=True, \
     n_down_perf_before_stop=3)
    assert results['Testing R-Squared'] == '0.097'
    assert results['Selected Variables'] == ['Shell weight', 'Shucked weight', 'Whole weight', 'Shell weight.ABS(* - Q25(*))', \
     'Viscera weight.ABS(* - MEDIAN(*))', 'Viscera weight.ABS(* - MEAN(*))', 'Height', 'Length', 'Diameter', 'Sex_I', \
     'Shucked weight.ABS(* - MEDIAN(*))', 'Diameter.ABS(* - MEDIAN(*))', 'Viscera weight.ABS(* - Q75(*))', \
     'Viscera weight.ABS(* - Q25(*))', 'Diameter.ABS(* - Q25(*))', 'Sex_M', 'Sex_F']
def test_include_mi():
    dataset = Abalone()
    target_column = dataset.y_column
    df = dataset.df
    results = df.kxy.data_valuation(target_column, problem_type='regression', \
     include_mutual_information=True)
    assert 'Mutual Information' in results
Пример #10
0
def test_linear_regression():
    regressor_cls = get_sklearn_learner(
        'sklearn.linear_model.LinearRegression', random_state=0)
    from warnings import simplefilter
    from sklearn.exceptions import ConvergenceWarning
    simplefilter("ignore", category=ConvergenceWarning)

    fs = Boruta(regressor_cls)

    dataset = Abalone()
    target_column = dataset.y_column
    df = dataset.df

    # Features generation
    features_df = df.kxy.generate_features(entity=None,
                                           max_lag=None,
                                           entity_name='*',
                                           exclude=[target_column])

    # Feature selection
    x_columns = [_ for _ in features_df.columns if _ != target_column]
    x_df = features_df[x_columns]
    y_df = features_df[[target_column]]
    m = fs.fit(x_df, y_df)

    # Assertions
    assert len(fs.selected_variables) == 3
    assert fs.selected_variables == ['Sex_M', 'Sex_I', 'Sex_F']
Пример #11
0
def test_xgboost_regression():
    regressor_cls = get_xgboost_learner('xgboost.XGBRegressor', random_state=0)

    fs = RFE(regressor_cls)

    dataset = Abalone()
    target_column = dataset.y_column
    df = dataset.df

    # Features generation
    features_df = df.kxy.generate_features(entity=None,
                                           max_lag=None,
                                           entity_name='*',
                                           exclude=[target_column])

    # Feature selection
    x_columns = [_ for _ in features_df.columns if _ != target_column]
    x_df = features_df[x_columns]
    y_df = features_df[[target_column]]
    n_vars = max(x_df.shape[1] - 5, 1)
    m = fs.fit(x_df, y_df, n_vars)

    # Assertions
    assert len(fs.selected_variables) == n_vars
    assert fs.selected_variables == ['Shell weight', 'Sex_I', 'Shucked weight.ABS(* - Q25(*))', 'Shucked weight', 'Shucked weight.ABS(* - MEDIAN(*))', \
     'Shucked weight.ABS(* - MEAN(*))', 'Diameter.ABS(* - Q75(*))', 'Height.ABS(* - Q75(*))', 'Diameter.ABS(* - MEAN(*))', 'Diameter.ABS(* - Q25(*))', \
     'Whole weight.ABS(* - MEDIAN(*))', 'Viscera weight.ABS(* - Q75(*))', 'Sex_M', 'Height.ABS(* - MEAN(*))', 'Shucked weight.ABS(* - Q75(*))', \
     'Viscera weight.ABS(* - MEAN(*))', 'Height.ABS(* - Q25(*))', 'Whole weight.ABS(* - MEAN(*))', 'Shell weight.ABS(* - Q25(*))', 'Whole weight.ABS(* - Q25(*))', \
     'Length.ABS(* - MEAN(*))', 'Length.ABS(* - Q75(*))', 'Whole weight.ABS(* - Q75(*))', 'Diameter.ABS(* - MEDIAN(*))', 'Shell weight.ABS(* - Q75(*))', \
     'Shell weight.ABS(* - MEAN(*))', 'Shell weight.ABS(* - MEDIAN(*))', 'Length.ABS(* - MEDIAN(*))', 'Sex_F', 'Viscera weight', 'Whole weight', \
     'Length.ABS(* - Q25(*))', 'Viscera weight.ABS(* - MEDIAN(*))']
Пример #12
0
def test_xgboost_regression():
    regressor_cls = get_xgboost_learner('xgboost.XGBRegressor', random_state=0)

    fs = Boruta(regressor_cls)

    dataset = Abalone()
    target_column = dataset.y_column
    df = dataset.df

    # Features generation
    features_df = df.kxy.generate_features(entity=None,
                                           max_lag=None,
                                           entity_name='*',
                                           exclude=[target_column])

    # Feature selection
    x_columns = [_ for _ in features_df.columns if _ != target_column]
    x_df = features_df[x_columns]
    y_df = features_df[[target_column]]
    m = fs.fit(x_df, y_df)

    # Assertions
    assert len(fs.selected_variables) == 5
    assert fs.selected_variables == [
        'Shucked weight', 'Shell weight', 'Sex_I',
        'Shucked weight.ABS(* - Q25(*))', 'Whole weight'
    ]
Пример #13
0
def test_random_forest_regression():
    regressor_cls = get_sklearn_learner(
        'sklearn.ensemble.RandomForestRegressor', random_state=0)
    from warnings import simplefilter
    from sklearn.exceptions import ConvergenceWarning
    simplefilter("ignore", category=ConvergenceWarning)

    fs = Boruta(regressor_cls)

    dataset = Abalone()
    target_column = dataset.y_column
    df = dataset.df

    # Features generation
    features_df = df.kxy.generate_features(entity=None,
                                           max_lag=None,
                                           entity_name='*',
                                           exclude=[target_column])

    # Feature selection
    x_columns = [_ for _ in features_df.columns if _ != target_column]
    x_df = features_df[x_columns]
    y_df = features_df[[target_column]]
    m = fs.fit(x_df, y_df)

    # Assertions
    assert len(fs.selected_variables) == 11
    assert fs.selected_variables == ['Shucked weight.ABS(* - Q75(*))', 'Shucked weight.ABS(* - Q25(*))', 'Shucked weight.ABS(* - MEDIAN(*))', \
     'Shucked weight.ABS(* - MEAN(*))', 'Shucked weight', 'Shell weight.ABS(* - Q75(*))', 'Shell weight.ABS(* - Q25(*))', \
     'Shell weight.ABS(* - MEDIAN(*))', 'Shell weight.ABS(* - MEAN(*))', 'Shell weight', 'Sex_I']
Пример #14
0
def test_lightgbm_regression():
    lgbm_params = {
        'objective': 'rmse',
        'boosting_type': 'gbdt',
        'n_jobs': -1,
        'learning_rate': 0.1,
        'verbose': -1,
    }
    regressor_cls = get_lightgbm_learner_learning_api(lgbm_params, num_boost_round=2000, \
     early_stopping_rounds=5, split_random_seed=0)

    fs = Boruta(regressor_cls)

    dataset = Abalone()
    target_column = dataset.y_column
    df = dataset.df

    # Features generation
    features_df = df.kxy.generate_features(entity=None,
                                           max_lag=None,
                                           entity_name='*',
                                           exclude=[target_column])

    # Feature selection
    x_columns = [_ for _ in features_df.columns if _ != target_column]
    x_df = features_df[x_columns]
    y_df = features_df[[target_column]]
    m = fs.fit(x_df, y_df)

    assert len(fs.selected_variables) == 13
    assert fs.selected_variables == ['Shucked weight.ABS(* - Q75(*))', 'Shucked weight.ABS(* - MEDIAN(*))', \
     'Shucked weight.ABS(* - MEAN(*))', 'Shucked weight', 'Shell weight.ABS(* - MEAN(*))', 'Shell weight', \
     'Shucked weight.ABS(* - Q25(*))', 'Sex_I', 'Diameter', 'Whole weight', 'Shell weight.ABS(* - Q75(*))', \
     'Whole weight.ABS(* - MEAN(*))', 'Height']
Пример #15
0
def test_norm():
    dataset = Abalone()
    target_column = dataset.y_column
    df = dataset.df

    # Features generation
    features_df = df.kxy.generate_features(entity=None,
                                           max_lag=None,
                                           entity_name='*',
                                           exclude=[target_column])
    y = features_df[target_column].values
    x_columns = [_ for _ in features_df.columns if _ != target_column]
    x = features_df[x_columns].values

    # Principal features construction
    feature_directions = PFS().fit(x, y)
    n_directions = feature_directions.shape[0]
    for i in range(n_directions):
        assert np.allclose(
            np.dot(feature_directions[i, :], feature_directions[i, :]), 1.)

    predictor = PFSPredictor()
    learner_func = get_sklearn_learner(
        'sklearn.ensemble.RandomForestRegressor', random_state=0)
    results = predictor.fit(features_df, target_column, learner_func)
    feature_directions = results['Feature Directions']
    n_directions = feature_directions.shape[0]
    for i in range(n_directions):
        assert np.allclose(
            np.dot(feature_directions[i, :], feature_directions[i, :]), 1.)
Пример #16
0
def test_boruta():
	# Regression
	sklearn_regressor_cls = get_xgboost_learner('xgboost.XGBRegressor')
	dataset = Abalone()
	target_column = dataset.y_column
	df = dataset.df

	# Features generation
	features_df = df.kxy.generate_features(entity=None, max_lag=None, entity_name='*', exclude=[target_column])

	# Model building
	results = features_df.kxy.fit(target_column, sklearn_regressor_cls, \
		problem_type='regression', feature_selection_method='boruta', boruta_n_evaluations=100)
	assert results['Selected Variables'] == ['Shucked weight', 'Shell weight', 'Sex_I', \
		'Shucked weight.ABS(* - Q25(*))', 'Whole weight']
Пример #17
0
def test_single_learner():
    # Regression
    xgboost_regressor_cls = get_xgboost_learner('xgboost.XGBRegressor')
    dataset = Abalone()
    target_column = dataset.y_column
    df = dataset.df

    # Features generation
    features_df = df.kxy.generate_features(entity=None,
                                           max_lag=None,
                                           entity_name='*',
                                           exclude=[target_column])

    # Model building
    results = features_df.kxy.fit(target_column, xgboost_regressor_cls, \
     problem_type='regression', start_n_features=2, min_n_features=2, max_n_features=2, \
     additive_learning=True, return_scores=True, n_down_perf_before_stop=1)
    assert results['Testing R-Squared'] == '0.493'
    assert results['Selected Variables'] == ['Shell weight', 'Shucked weight']
    assert len(features_df.kxy.predictor.models) == 1
Пример #18
0
def test_lean_boosted_lightgbm_learning_regressor():
    # Regression
    params = params = {
        'objective': 'rmse',
        'boosting_type': 'gbdt',
        'num_leaves': 100,
        'n_jobs': -1,
        'learning_rate': 0.1,
        'feature_fraction': 0.8,
        'bagging_fraction': 0.8,
        'verbose': -1,
    }
    lightgbm_regressor_cls = get_lightgbm_learner_learning_api(params, num_boost_round=10000, \
     early_stopping_rounds=50, verbose_eval=50, split_random_seed=42)
    dataset = Abalone()
    target_column = dataset.y_column
    df = dataset.df

    # Features generation
    features_df = df.kxy.generate_features(entity=None,
                                           max_lag=None,
                                           entity_name='*',
                                           exclude=[target_column])

    # Model building
    results = features_df.kxy.fit(target_column, lightgbm_regressor_cls, \
     problem_type='regression', additive_learning=True, return_scores=True, \
     n_down_perf_before_stop=1)
    model = results['predictor'].models[0]
    feature_columns = results['Selected Variables']
    x = features_df[feature_columns].values
    predictions = model.predict(x)
    path = '../kxy/misc/cache/%s-%s.sav' % (dataset.name,
                                            'lightbm-learning-api-regressor')
    model.save(path)

    loaded_model = lightgbm_regressor_cls(path=path)
    loaded_predictions = loaded_model.predict(x)

    assert np.allclose(predictions, loaded_predictions)
Пример #19
0
def test_leanml_predictor_lightgbm():
    # Regression
    params = params = {
        'objective': 'rmse',
        'boosting_type': 'gbdt',
        'num_leaves': 100,
        'n_jobs': -1,
        'learning_rate': 0.1,
        'feature_fraction': 0.8,
        'bagging_fraction': 0.8,
        'verbose': -1,
    }
    lightgbm_regressor_cls = get_lightgbm_learner_learning_api(params, num_boost_round=10000, \
     early_stopping_rounds=50, verbose_eval=50, split_random_seed=42)
    dataset = Abalone()
    target_column = dataset.y_column
    df = dataset.df

    # Features generation
    features_df = df.kxy.generate_features(entity=None,
                                           max_lag=None,
                                           entity_name='*',
                                           exclude=[target_column])

    # Model building
    results = features_df.kxy.fit(target_column, lightgbm_regressor_cls, \
     problem_type='regression')
    feature_columns = results['Selected Variables']
    predictor = results['predictor']
    predictions = predictor.predict(features_df[feature_columns])
    path = '../kxy/misc/cache/%s-%s.sav' % (dataset.name,
                                            'lightbm-learning-api-regressor')
    predictor.save(path)

    loaded_predictor = LeanMLPredictor.load(path, lightgbm_regressor_cls)
    loaded_predictions = loaded_predictor.predict(features_df[feature_columns])

    assert np.allclose(predictions, loaded_predictions)
Пример #20
0
def test_pfs_feature_selection():
    # Regression
    xgboost_regressor_cls = get_xgboost_learner('xgboost.XGBRegressor')
    dataset = Abalone()
    target_column = dataset.y_column
    df = dataset.df

    # Features generation
    features_df = df.kxy.generate_features(entity=None,
                                           max_lag=None,
                                           entity_name='*',
                                           exclude=[target_column])

    # Model building
    results = features_df.kxy.fit(target_column, xgboost_regressor_cls, \
     problem_type='regression', feature_selection_method='pfs')
    assert results['Feature Directions'].shape[1] == features_df.shape[1] - 1
    predictor = results['predictor']
    predictions = predictor.predict(features_df)
    assert len(predictions.columns) == 1
    assert target_column in predictions.columns
    assert set(features_df.index).difference(set(predictions.index)) == set()
    assert set(predictions.index).difference(set(features_df.index)) == set()
Пример #21
0
def test_shape():
    dataset = Abalone()
    target_column = dataset.y_column
    df = dataset.df

    # Features generation
    features_df = df.kxy.generate_features(entity=None,
                                           max_lag=None,
                                           entity_name='*',
                                           exclude=[target_column])
    y = features_df[target_column].values
    x_columns = [_ for _ in features_df.columns if _ != target_column]
    x = features_df[x_columns].values

    # Principal features construction
    feature_directions = PCA().fit(x, y)
    assert feature_directions.shape[1] == x.shape[1]

    predictor = PCAPredictor()
    learner_func = get_sklearn_learner(
        'sklearn.ensemble.RandomForestRegressor', random_state=0)
    results = predictor.fit(features_df, target_column, learner_func)
    feature_directions = results['Feature Directions']
    assert feature_directions.shape[1] == x.shape[1]
Пример #22
0
def test_lean_boosted_pytorch_regressor():
    import torch
    torch.manual_seed(0)
    # Regression
    layers = [(10, 'relu'), (5, 'relu'), (1, None)]
    clz = 'skorch.NeuralNetRegressor'
    pt_regressor_cls = get_pytorch_dense_learner(clz,
                                                 layers,
                                                 max_epochs=10,
                                                 batch_size=100)
    dataset = Abalone()
    target_column = dataset.y_column
    df = dataset.df

    # Features generation
    features_df = df.kxy.generate_features(entity=None,
                                           max_lag=None,
                                           entity_name='*',
                                           exclude=[target_column])

    # Model building
    results = features_df.kxy.fit(target_column, pt_regressor_cls, \
     problem_type='regression', additive_learning=True, return_scores=True, \
     n_down_perf_before_stop=1)
    model = results['predictor'].models[0]
    feature_columns = results['Selected Variables']
    x = features_df[feature_columns].values
    predictions = model.predict(x)
    path = '../kxy/misc/cache/%s-%s.sav' % (dataset.name, clz)
    model.save(path)

    n_vars = x.shape[1]
    loaded_model = pt_regressor_cls(n_vars=n_vars, path=path)
    loaded_predictions = loaded_model.predict(x)

    assert np.allclose(predictions, loaded_predictions)
Пример #23
0
def test_lean_boosted_tensorflow_regressor():
    import tensorflow as tf
    tf.random.set_seed(0)
    # Regression
    layers = [(10, 'relu'), (5, 'relu'), (1, 'linear')]
    loss = 'mean_absolute_error'
    optimizer = 'adam'
    clz = 'KerasRegressor'
    tf_regressor_cls = get_tensorflow_dense_learner(clz, layers, loss, optimizer=optimizer, \
     epochs=10, batch_size=100)
    dataset = Abalone()
    target_column = dataset.y_column
    df = dataset.df

    # Features generation
    features_df = df.kxy.generate_features(entity=None,
                                           max_lag=None,
                                           entity_name='*',
                                           exclude=[target_column])

    # Model building
    results = features_df.kxy.fit(target_column, tf_regressor_cls, \
     problem_type='regression', additive_learning=True, return_scores=True, \
     n_down_perf_before_stop=1)
    model = results['predictor'].models[0]
    feature_columns = results['Selected Variables']
    x = features_df[feature_columns].values
    predictions = model.predict(x)
    path = '../kxy/misc/cache/%s-%s.h5' % (dataset.name, clz)
    model.save(path)

    n_vars = x.shape[1]
    loaded_model = tf_regressor_cls(n_vars=n_vars, path=path)
    loaded_predictions = loaded_model.predict(x)

    assert np.allclose(predictions, loaded_predictions)