示例#1
0
    def test_compare_all_models_classification(model_name=None):
        np.random.seed(0)

        df_titanic_train, df_titanic_test = utils.get_titanic_binary_classification_dataset(
        )

        column_descriptions = {
            'survived': 'output',
            'embarked': 'categorical',
            'pclass': 'categorical'
        }

        ml_predictor = Predictor(type_of_estimator='classifier',
                                 column_descriptions=column_descriptions)

        ml_predictor.train(df_titanic_train,
                           compare_all_models=True,
                           model_names=model_name)

        test_score = ml_predictor.score(df_titanic_test,
                                        df_titanic_test.survived)

        print('test_score')
        print(test_score)

        assert -0.215 < test_score < -0.17
示例#2
0
def test_include_bad_y_vals_train_classification(model_name=None):
    np.random.seed(0)

    df_titanic_train, df_titanic_test = utils.get_titanic_binary_classification_dataset(
    )

    column_descriptions = {
        'survived': 'output',
        'embarked': 'categorical',
        'pclass': 'categorical'
    }

    ml_predictor = Predictor(type_of_estimator='classifier',
                             column_descriptions=column_descriptions)

    df_titanic_train.ix[1, 'survived'] = None
    df_titanic_train.ix[8, 'survived'] = None
    df_titanic_train.ix[26, 'survived'] = None

    ml_predictor.train(df_titanic_train, model_names=model_name)

    test_score = ml_predictor.score(df_titanic_test.to_dict('records'),
                                    df_titanic_test.survived)

    print('test_score')
    print(test_score)

    assert -0.215 < test_score < -0.17
示例#3
0
def test_list_of_single_model_name_classification():
    np.random.seed(0)
    model_name = 'GradientBoostingClassifier'

    df_titanic_train, df_titanic_test = utils.get_titanic_binary_classification_dataset(
    )

    column_descriptions = {
        'survived': 'output',
        'sex': 'categorical',
        'embarked': 'categorical',
        'pclass': 'categorical'
    }

    ml_predictor = Predictor(type_of_estimator='classifier',
                             column_descriptions=column_descriptions)

    ml_predictor.train(df_titanic_train, model_names=[model_name])

    test_score = ml_predictor.score(df_titanic_test, df_titanic_test.survived)

    print('test_score')
    print(test_score)

    assert -0.16 < test_score < -0.135
示例#4
0
def runAutoML(df_train, df_test):
    column_descriptions = {'STAGE': 'output'}
    models = [
        'AdaBoostClassifier',
        'ExtraTreesClassifier',
        'GradientBoostingClassifier',
        'MiniBatchKMeans',
        'PassiveAggressiveClassifier',
        #   'Perceptron',
        'RandomForestClassifier',
        'RidgeClassifier',
        'SGDClassifier',
        #   'DeepLearningClassifier',
        'LGBMClassifier',
        'XGBClassifier',
    ]
    # models = None
    ml_predictor = Predictor(type_of_estimator='classifier',
                             column_descriptions=column_descriptions)
    ml_predictor.train(df_train,
                       model_names=models,
                       optimize_final_model=False)
    score = ml_predictor.score(df_test,
                               df_test.STAGE,
                               advanced_scoring=True,
                               verbose=3)
    return score
示例#5
0
def ensemble_regressor_basic_test():
    np.random.seed(0)

    df_boston_train, df_boston_test = utils.get_boston_regression_dataset()

    column_descriptions = {
        'MEDV': 'output'
        , 'CHAS': 'categorical'
    }

    ensemble_config = [
        {
            'model_name': 'LGBMRegressor'
        }
        , {
            'model_name': 'RandomForestRegressor'
        }

    ]


    ml_predictor = Predictor(type_of_estimator='regressor', column_descriptions=column_descriptions)

    ml_predictor.train(df_boston_train, ensemble_config=None)

    test_score = ml_predictor.score(df_boston_test, df_boston_test.MEDV)

    print('test_score')
    print(test_score)

    assert -3.0 < test_score < -2.8
示例#6
0
def test_pass_in_list_of_dictionaries_predict_classification():
    np.random.seed(0)

    df_titanic_train, df_titanic_test = utils.get_titanic_binary_classification_dataset(
    )

    column_descriptions = {
        'survived': 'output',
        'sex': 'categorical',
        'embarked': 'categorical',
        'pclass': 'categorical'
    }

    ml_predictor = Predictor(type_of_estimator='classifier',
                             column_descriptions=column_descriptions)

    list_titanic_train = df_titanic_train.to_dict('records')

    ml_predictor.train(df_titanic_train)

    test_score = ml_predictor.score(df_titanic_test.to_dict('records'),
                                    df_titanic_test.survived)

    print('test_score')
    print(test_score)

    assert -0.16 < test_score < -0.135
示例#7
0
def ensemble_classifier_basic_test(model_name=None):
    np.random.seed(0)

    df_titanic_train, df_titanic_test = utils.get_titanic_binary_classification_dataset()

    column_descriptions = {
        'survived': 'output'
        , 'sex': 'categorical'
        , 'embarked': 'categorical'
        , 'pclass': 'categorical'
    }

    ensemble_config = [
        {
            'model_name': 'LGBMClassifier'
        }
        , {
            'model_name': 'RandomForestClassifier'
        }

    ]

    ml_predictor = Predictor(type_of_estimator='classifier', column_descriptions=column_descriptions)


    ml_predictor.train(df_titanic_train, ensemble_config=ensemble_config)

    test_score = ml_predictor.score(df_titanic_test, df_titanic_test.survived)

    print('test_score')
    print(test_score)

    assert -0.15 < test_score < -0.131
示例#8
0
def test_all_algos_classification(model_name=None):
    np.random.seed(0)

    df_titanic_train, df_titanic_test = utils.get_titanic_binary_classification_dataset(
    )

    column_descriptions = {
        'survived': 'output',
        'embarked': 'categorical',
        'pclass': 'categorical'
    }

    ml_predictor = Predictor(type_of_estimator='classifier',
                             column_descriptions=column_descriptions)

    ml_predictor.train(
        df_titanic_train,
        model_names=[
            'LogisticRegression', 'RandomForestClassifier', 'RidgeClassifier',
            'GradientBoostingClassifier', 'ExtraTreesClassifier',
            'AdaBoostClassifier', 'SGDClassifier', 'Perceptron',
            'PassiveAggressiveClassifier', 'DeepLearningClassifier',
            'XGBClassifier', 'LGBMClassifier'
        ])

    test_score = ml_predictor.score(df_titanic_test, df_titanic_test.survived)

    print('test_score')
    print(test_score)

    assert -0.215 < test_score < -0.17
示例#9
0
def test_all_algos_regression():
    # a random seed of 42 has ExtraTreesRegressor getting the best CV score, and that model doesn't generalize as well as GradientBoostingRegressor.
    np.random.seed(0)

    df_boston_train, df_boston_test = utils.get_boston_regression_dataset()

    column_descriptions = {'MEDV': 'output', 'CHAS': 'categorical'}

    ml_predictor = Predictor(type_of_estimator='regressor',
                             column_descriptions=column_descriptions)

    ml_predictor.train(
        df_boston_train,
        model_names=[
            'LinearRegression', 'RandomForestRegressor', 'Ridge',
            'GradientBoostingRegressor', 'AdaBoostRegressor', 'SGDRegressor',
            'PassiveAggressiveRegressor', 'Lasso', 'LassoLars', 'ElasticNet',
            'OrthogonalMatchingPursuit', 'BayesianRidge', 'ARDRegression',
            'MiniBatchKMeans', 'DeepLearningRegressor', 'LGBMRegressor',
            'XGBClassifier', 'LinearSVR', 'CatBoostRegressor'
        ])

    test_score = ml_predictor.score(df_boston_test, df_boston_test.MEDV)

    print('test_score')
    print(test_score)

    assert -3.4 < test_score < -2.8
示例#10
0
def test_perform_feature_scaling_true_classification(model_name=None):
    np.random.seed(0)

    df_titanic_train, df_titanic_test = utils.get_titanic_binary_classification_dataset(
    )

    column_descriptions = {
        'survived': 'output',
        'embarked': 'categorical',
        'pclass': 'categorical'
    }

    ml_predictor = Predictor(type_of_estimator='classifier',
                             column_descriptions=column_descriptions)

    ml_predictor.train(df_titanic_train,
                       perform_feature_scaling=True,
                       model_names=model_name)

    test_score = ml_predictor.score(df_titanic_test, df_titanic_test.survived)

    print('test_score')
    print(test_score)

    lower_bound = -0.215
    if model_name == 'DeepLearningClassifier':
        lower_bound = -0.235

    assert lower_bound < test_score < -0.17
示例#11
0
def categorical_ensembling_classification(model_name=None):
    np.random.seed(0)

    df_titanic_train, df_titanic_test = utils.get_titanic_binary_classification_dataset()

    column_descriptions = {
        'survived': 'output'
        , 'embarked': 'categorical'
        , 'pclass': 'categorical'
    }

    ml_predictor = Predictor(type_of_estimator='classifier', column_descriptions=column_descriptions)

    ml_predictor.train_categorical_ensemble(df_titanic_train, model_names=model_name, categorical_column='embarked')

    test_score = ml_predictor.score(df_titanic_test, df_titanic_test.survived)

    print('test_score')
    print(test_score)

    lower_bound = -0.215

    if model_name == 'DeepLearningClassifier':
        lower_bound = -0.237
    if model_name == 'XGBClassifier':
        lower_bound = -0.235
    if model_name == 'LGBMClassifier':
        lower_bound = -0.22
    if model_name == 'GradientBoostingClassifier':
        lower_bound = -0.23
    if model_name == 'CatBoostClassifier':
        lower_bound = -0.25


    assert lower_bound < test_score < -0.17
示例#12
0
def test_nlp_multilabel_classification():
    np.random.seed(0)

    df_twitter_train, df_twitter_test = utils.get_twitter_sentiment_multilabel_classification_dataset(
    )

    column_descriptions = {
        'airline_sentiment': 'output',
        'airline': 'categorical',
        'text': 'nlp',
        'tweet_location': 'categorical',
        'user_timezone': 'categorical',
        'tweet_created': 'date'
    }

    ml_predictor = Predictor(type_of_estimator='classifier',
                             column_descriptions=column_descriptions)
    ml_predictor.train(df_twitter_train)

    test_score = ml_predictor.score(df_twitter_test,
                                    df_twitter_test.airline_sentiment,
                                    verbose=0)
    # Make sure our score is good, but not unreasonably good
    print('test_score')
    print(test_score)
    assert 0.67 < test_score < 0.79
def test_select_from_multiple_classification_models_using_X_test_and_y_test():
    np.random.seed(0)

    df_titanic_train, df_titanic_test = utils.get_titanic_binary_classification_dataset(
    )

    column_descriptions = {
        'survived': 'output',
        'embarked': 'categorical',
        'pclass': 'categorical'
    }

    ml_predictor = Predictor(type_of_estimator='classifier',
                             column_descriptions=column_descriptions)

    ml_predictor.train(df_titanic_train,
                       model_names=[
                           'LogisticRegression', 'RandomForestClassifier',
                           'RidgeClassifier', 'GradientBoostingClassifier',
                           'ExtraTreesClassifier', 'AdaBoostClassifier',
                           'SGDClassifier', 'Perceptron',
                           'PassiveAggressiveClassifier'
                       ],
                       X_test=df_titanic_test,
                       y_test=df_titanic_test.survived)

    test_score = ml_predictor.score(df_titanic_test, df_titanic_test.survived)

    print('test_score')
    print(test_score)

    assert -0.215 < test_score < -0.17
示例#14
0
def test_perform_feature_scaling_true_classification():
    np.random.seed(0)

    df_titanic_train, df_titanic_test = utils.get_titanic_binary_classification_dataset(
    )

    column_descriptions = {
        'survived': 'output',
        'embarked': 'categorical',
        'pclass': 'categorical'
    }

    ml_predictor = Predictor(type_of_estimator='classifier',
                             column_descriptions=column_descriptions)

    ml_predictor.train(df_titanic_train,
                       perform_feature_scaling=True,
                       model_names=['LGBMClassifier'])

    test_score = ml_predictor.score(df_titanic_test, df_titanic_test.survived)

    print('test_score')
    print(test_score)

    assert -0.215 < test_score < -0.17
示例#15
0
def categorical_ensembling_regression(model_name=None):
    np.random.seed(0)

    df_boston_train, df_boston_test = utils.get_boston_regression_dataset()

    column_descriptions = {'MEDV': 'output', 'CHAS': 'categorical'}

    ml_predictor = Predictor(type_of_estimator='regressor',
                             column_descriptions=column_descriptions)

    ml_predictor.train_categorical_ensemble(df_boston_train,
                                            perform_feature_selection=True,
                                            model_names=model_name,
                                            categorical_column='CHAS')

    test_score = ml_predictor.score(df_boston_test, df_boston_test.MEDV)

    print('test_score')
    print(test_score)

    # Bumping this up since without these features our score drops
    lower_bound = -4.0
    if model_name == 'DeepLearningRegressor':
        # NOTE: the model fails to learn for one of the classes. might be worth looking into more
        lower_bound = -16
    if model_name == 'LGBMRegressor':
        lower_bound = -4.95

    assert lower_bound < test_score < -2.8
def test_X_test_and_y_test_classification():
    np.random.seed(0)

    df_titanic_train, df_titanic_test = utils.get_titanic_binary_classification_dataset(
    )

    column_descriptions = {
        'survived': 'output',
        'embarked': 'categorical',
        'pclass': 'categorical'
    }

    ml_predictor = Predictor(type_of_estimator='classifier',
                             column_descriptions=column_descriptions)

    ml_predictor.train(df_titanic_train,
                       X_test=df_titanic_test,
                       y_test=df_titanic_test.survived)

    test_score = ml_predictor.score(df_titanic_test, df_titanic_test.survived)

    print('test_score')
    print(test_score)

    assert -0.215 < test_score < -0.17
def test_include_bad_y_vals_predict_classification():
    np.random.seed(0)

    df_titanic_train, df_titanic_test = utils.get_titanic_binary_classification_dataset()

    column_descriptions = {
        'survived': 'output'
        , 'sex': 'categorical'
        , 'embarked': 'categorical'
        , 'pclass': 'categorical'
    }

    ml_predictor = Predictor(type_of_estimator='classifier', column_descriptions=column_descriptions)

    df_titanic_test.iloc[1]['survived'] = float('nan')
    df_titanic_test.iloc[8]['survived'] = float('inf')
    df_titanic_test.iloc[26]['survived'] = None

    ml_predictor.train(df_titanic_train)

    test_score = ml_predictor.score(df_titanic_test.to_dict('records'), df_titanic_test.survived)

    print('test_score')
    print(test_score)

    assert -0.16 < test_score < -0.135
示例#18
0
def optimize_final_model_regression(model_name=None):
    np.random.seed(0)

    df_boston_train, df_boston_test = utils.get_boston_regression_dataset()

    column_descriptions = {'MEDV': 'output', 'CHAS': 'categorical'}

    ml_predictor = Predictor(type_of_estimator='regressor',
                             column_descriptions=column_descriptions)

    ml_predictor.train(df_boston_train,
                       optimize_final_model=True,
                       model_names=model_name)

    test_score = ml_predictor.score(df_boston_test, df_boston_test.MEDV)

    print('test_score')
    print(test_score)

    # the random seed gets a score of -3.21 on python 3.5
    # There's a ton of noise here, due to small sample sizes
    lower_bound = -3.4
    if model_name == 'DeepLearningRegressor':
        lower_bound = -13
    if model_name == 'LGBMRegressor':
        lower_bound = -5.5
    if model_name == 'GradientBoostingRegressor':
        lower_bound = -3.5

    assert lower_bound < test_score < -2.8
def test_all_algos_regression():
    # a random seed of 42 has ExtraTreesRegressor getting the best CV score, and that model doesn't generalize as well as GradientBoostingRegressor.
    np.random.seed(0)

    df_boston_train, df_boston_test = utils.get_boston_regression_dataset()

    column_descriptions = {'MEDV': 'output', 'CHAS': 'categorical'}

    ml_predictor = Predictor(type_of_estimator='regressor',
                             column_descriptions=column_descriptions)

    ml_predictor.train(df_boston_train,
                       model_names=[
                           'LinearRegression', 'RandomForestRegressor',
                           'Ridge', 'GradientBoostingRegressor',
                           'ExtraTreesRegressor', 'AdaBoostRegressor',
                           'SGDRegressor', 'PassiveAggressiveRegressor'
                       ])

    test_score = ml_predictor.score(df_boston_test, df_boston_test.MEDV)

    print('test_score')
    print(test_score)

    assert -3.25 < test_score < -2.8
def test_select_from_multiple_regression_models_using_X_test_and_y_test():
    np.random.seed(0)

    df_boston_train, df_boston_test = utils.get_boston_regression_dataset()

    column_descriptions = {'MEDV': 'output', 'CHAS': 'categorical'}

    ml_predictor = Predictor(type_of_estimator='regressor',
                             column_descriptions=column_descriptions)

    ml_predictor.train(df_boston_train,
                       model_names=[
                           'LinearRegression', 'RandomForestRegressor',
                           'Ridge', 'GradientBoostingRegressor',
                           'ExtraTreesRegressor', 'AdaBoostRegressor',
                           'SGDRegressor', 'PassiveAggressiveRegressor'
                       ],
                       X_test=df_boston_test,
                       y_test=df_boston_test.MEDV)

    test_score = ml_predictor.score(df_boston_test, df_boston_test.MEDV)

    print('test_score')
    print(test_score)

    # Due to the small sample size of this test set, GSCV may sometimes pick ExtraTreesRegressor as the best model, just very slightly beating out GradientBoostingRegressor.
    # ExtraTrees doesn't generalize as well, however, scoring a mere -3.20x something or other, and narrowly missing our cutoff from above.
    # Given that is is only an issue when running on tiny toy datasets, I'm not concerned for the use cases I intend to support, and thus, am bumping up the upper bound on our error metric ever so slightly
    assert -3.25 < test_score < -2.8
示例#21
0
def test_perform_feature_selection_true_regression(model_name=None):
    np.random.seed(0)

    df_boston_train, df_boston_test = utils.get_boston_regression_dataset()

    column_descriptions = {
        'MEDV': 'output'
        , 'CHAS': 'categorical'
    }

    ml_predictor = Predictor(type_of_estimator='regressor', column_descriptions=column_descriptions)

    ml_predictor.train(df_boston_train, perform_feature_selection=True, model_names=model_name)

    test_score = ml_predictor.score(df_boston_test, df_boston_test.MEDV)

    print('test_score')
    print(test_score)

    # Bumping this up since without these features our score drops
    lower_bound = -4.0
    if model_name == 'DeepLearningRegressor':
        lower_bound = -14.5
    if model_name == 'LGBMRegressor':
        lower_bound = -4.95


    assert lower_bound < test_score < -2.8
def test_categorical_ensemble_basic_classifier():
    np.random.seed(0)

    df_titanic_train, df_titanic_test = utils.get_titanic_binary_classification_dataset(
    )

    column_descriptions = {
        'survived': 'output',
        'pclass': 'categorical',
        'embarked': 'categorical',
        'sex': 'categorical'
    }

    ml_predictor = Predictor(type_of_estimator='classifier',
                             column_descriptions=column_descriptions)

    ml_predictor.train_categorical_ensemble(df_titanic_train,
                                            categorical_column='pclass',
                                            optimize_final_model=False)

    test_score = ml_predictor.score(df_titanic_test, df_titanic_test.survived)

    print('test_score')
    print(test_score)

    # Small sample sizes mean there's a fair bit of noise here
    assert -0.155 < test_score < -0.135
示例#23
0
def test_linear_model_analytics_classification(model_name=None):
    np.random.seed(0)

    df_titanic_train, df_titanic_test = utils.get_titanic_binary_classification_dataset(
    )

    column_descriptions = {
        'survived': 'output',
        'sex': 'categorical',
        'embarked': 'categorical',
        'pclass': 'categorical'
    }

    ml_predictor = Predictor(type_of_estimator='classifier',
                             column_descriptions=column_descriptions)

    ml_predictor.train(df_titanic_train, model_names='RidgeClassifier')

    test_score = ml_predictor.score(df_titanic_test, df_titanic_test.survived)

    print('test_score')
    print(test_score)

    # Linear models aren't super great on this dataset...
    assert -0.21 < test_score < -0.17
示例#24
0
def optimize_final_model_classification(model_name=None):
    np.random.seed(0)

    df_titanic_train, df_titanic_test = utils.get_titanic_binary_classification_dataset()

    column_descriptions = {
        'survived': 'output'
        , 'embarked': 'categorical'
        , 'pclass': 'categorical'
    }

    ml_predictor = Predictor(type_of_estimator='classifier', column_descriptions=column_descriptions)

    ml_predictor.train(df_titanic_train, optimize_final_model=True, model_names=model_name)

    test_score = ml_predictor.score(df_titanic_test, df_titanic_test.survived)

    print('test_score')
    print(test_score)

    # Small sample sizes mean there's a fair bit of noise here
    lower_bound = -0.215

    if model_name == 'DeepLearningClassifier':
        lower_bound = -0.235
    if model_name == 'LGBMClassifier':
        lower_bound = -0.221

    assert lower_bound < test_score < -0.17
示例#25
0
def test_calibrate_final_model_missing_X_test_y_test_classification():
    np.random.seed(0)

    df_titanic_train, df_titanic_test = utils.get_titanic_binary_classification_dataset(
    )

    # Take a third of our test data (a tenth of our overall data) for calibration
    df_titanic_test, df_titanic_calibration = train_test_split(df_titanic_test,
                                                               test_size=0.33,
                                                               random_state=42)

    column_descriptions = {
        'survived': 'output',
        'sex': 'categorical',
        'embarked': 'categorical',
        'pclass': 'categorical'
    }

    ml_predictor = Predictor(type_of_estimator='classifier',
                             column_descriptions=column_descriptions)

    # This should still work, just with warning printed
    ml_predictor.train(df_titanic_train, calibrate_final_model=True)

    test_score = ml_predictor.score(df_titanic_test, df_titanic_test.survived)

    print('test_score')
    print(test_score)

    assert -0.14 < test_score < -0.12
示例#26
0
def test_optimize_entire_pipeline_classification():
    np.random.seed(0)

    df_titanic_train, df_titanic_test = utils.get_titanic_binary_classification_dataset(
    )

    column_descriptions = {
        'survived': 'output',
        'embarked': 'categorical',
        'pclass': 'categorical'
    }

    ml_predictor = Predictor(type_of_estimator='classifier',
                             column_descriptions=column_descriptions)

    ml_predictor.train(df_titanic_train,
                       optimize_entire_pipeline=True,
                       model_names=['DeepLearningClassifier'])

    test_score = ml_predictor.score(df_titanic_test, df_titanic_test.survived)

    print('test_score')
    print(test_score)

    assert -0.25 < test_score < -0.17
示例#27
0
class automl_tutorial():
    def __init__(self):
        self.df_train, self.df_test = None, None
        self.column_descriptions = {'MEDV': 'output', 'CHAS': 'categorical'}
        self.ml_predictor = None

    def run(self):
        self.load_data()
        self.fit_predict()

    def load_data(self):
        self.df_train, self.df_test = get_boston_dataset()

    def fit_predict(self):
        self.ml_predictor = Predictor(
            type_of_estimator='regressor',
            column_descriptions=self.column_descriptions)
        self.ml_predictor.train(self.df_train)
        self.ml_predictor.score(self.df_test, self.df_test.MEDV)
示例#28
0
文件: extautoML.py 项目: tgadf/pymva
def createAutoMLClassifier(params=None):

    df_train, df_test = get_boston_dataset()

    column_descriptions = {'MEDV': 'output', 'CHAS': 'categorical'}

    ml_predictor = Predictor(type_of_estimator='classifer',
                             column_descriptions=column_descriptions)

    ml_predictor.train(df_train)

    #ml_predictor.train(data, model_names=['DeepLearningClassifier'])
    # Available options are
    # DeepLearningClassifier and DeepLearningRegressor
    # XGBClassifier and XGBRegressor
    # LGBMClassifer and LGBMRegressor
    # CatBoostClassifier and CatBoostRegressor

    ml_predictor.score(df_test, df_test.MEDV)
示例#29
0
class prediction(model):
    @property
    def estimator(self):
        raise NotImplementedError()

    def build(self, meta_data):
        self.__model = Predictor(type_of_estimator=self.estimator,
                                 column_descriptions=meta_data)
        self.__label = self.meta_data_key(meta_data, "output")

    def train_and_score(self, data, labels, verbose):
        # create training and test data
        training_data, test_data = train_test_split(data, test_size=0.2)

        # train the model
        if verbose == False:
            warnings.filterwarnings('ignore')
            text_trap = io.StringIO()
            with redirect_stdout(text_trap):
                self.__model.train(training_data,
                                   verbose=False,
                                   ml_for_analytics=False)
        else:
            warnings.filterwarnings('ignore')
            self.__model.train(training_data,
                               verbose=True,
                               ml_for_analytics=False)

    # score the model
        if verbose == False:
            self.__model.score(test_data, test_data[self.__label], verbose=0)
        else:
            self.__model.score(test_data, test_data[self.__label], verbose=1)

    def interpret(self):
        pass

    def python_object(self):
        return self.__model
示例#30
0
def test_compare_all_models_regression():
    np.random.seed(0)

    df_boston_train, df_boston_test = utils.get_boston_regression_dataset()

    column_descriptions = {'MEDV': 'output', 'CHAS': 'categorical'}

    ml_predictor = Predictor(type_of_estimator='regressor',
                             column_descriptions=column_descriptions)

    ml_predictor.train(df_boston_train, compare_all_models=True)

    test_score = ml_predictor.score(df_boston_test, df_boston_test.MEDV)

    print('test_score')
    print(test_score)

    assert -3.2 < test_score < -2.8