Exemplo n.º 1
0
    def test_pipeline_step(self):
        pipeline = Pipeline() >> PipelineStep('a', lambda x, context: PipelineData(x.dataset + 1)) \
                              >> PipelineStep('b', lambda x, context: PipelineData(x.dataset + 2))

        executor = LocalExecutor()
        context, data = executor.run(pipeline, 0)
        self.assertEqual(data.dataset, 3)
Exemplo n.º 2
0
    def test_random_choice_combinator(self):
        for _ in range(0, 10):
            data = Dataset(np.zeros((2, 2)), np.zeros((2, 2)))
            result = LocalExecutor() << (Pipeline() >> RandomChoice([
                PipelineStep('a', lambda x, context: 'a'),
                PipelineStep('b', lambda x, context: 'b')
            ]))

            print(result)
            self.assertIn(result[1], ['a', 'b'])
Exemplo n.º 3
0
    def test_forest(self):
        model_list = [
            #(RandomForestRegressor, random_forest_hp_space('mae')),
            (RandomForestClassifier, random_forest_hp_space())
        ]

        data = Dataset(datasets.load_iris().data, datasets.load_iris().target)
        context, pipeline_data = LocalExecutor(data, 1) << (
            Pipeline() >> PipelineStep('model space', ModelSpace(model_list))
            >> PipelineStep(
                'H',
                Hyperopt(Validate(test_size=0.33, metrics=mean_absolute_error),
                         max_evals=2)))
Exemplo n.º 4
0
    def test_pipeline_hyperopt(self):
        x, y = make_classification(n_samples=100,
                                   n_features=40,
                                   n_informative=2,
                                   n_redundant=10,
                                   flip_y=0.05)
        model_list = [(RandomForestClassifier, random_forest_hp_space()),
                      (GradientBoostingClassifier, grad_boosting_hp_space()),
                      (SVC, svc_kernel_hp_space('rbf')),
                      (KNeighborsClassifier, knn_hp_space()),
                      (XGBClassifier, xgboost_hp_space())]

        data = Dataset(x, y)
        context, pipeline_data = LocalExecutor(data, 2) << (
            Pipeline() >> PipelineStep(
                'model space', ModelSpace(model_list),
                initializer=True) >> FormulaFeatureGenerator(['+', '-', '*'])
            >> Hyperopt(Validate(test_size=0.1, metrics=roc_auc_score),
                        max_evals=2) >> ChooseBest(1) >> FeatureSelector(10))

        print('0' * 30)
        for result in pipeline_data.return_val:
            print(result.model, result.score)
        print(pipeline_data.dataset.data.shape)
        print('0' * 30)
Exemplo n.º 5
0
    def test_correlated_feature_generator(self):

        model_list = [(RandomForestRegressor, {}),
                      (GradientBoostingRegressor, {}), (SVR, {}),
                      (XGBRegressor, {})]

        n_features_to_select = random.randint(5, 30)

        data = Dataset(datasets.load_boston().data,
                       datasets.load_boston().target)
        context, pipeline_data = LocalExecutor(data, 5) << (
            Pipeline() >> PipelineStep(
                'model space', ModelSpace(model_list), initializer=True) >>
            FormulaFeatureGenerator(['+', '-', '*', '/'], limit=10) >>
            Validate(test_size=0.1, metrics=mean_absolute_error) >> ChooseBest(
                1, by_largest_score=False) >>
            RecursiveFeatureSelector(n_features_to_select=n_features_to_select)
            >> CorrelatedFeatureSelector(max_correlation=0.9))

        preprocessing = Preprocessing()
        final_data = preprocessing.reproduce(
            pipeline_data.dataset,
            Dataset(datasets.load_boston().data,
                    datasets.load_boston().target))
        self.assertEqual(pipeline_data.dataset.data.shape, final_data.shape)
        self.assertTrue((final_data == pipeline_data.dataset.data).all())
Exemplo n.º 6
0
    def test_RFS(self):
        x, y = make_classification(n_samples=100,
                                   n_features=40,
                                   n_informative=2,
                                   n_redundant=10,
                                   flip_y=0.05)

        model_list = [(RandomForestClassifier, {}),
                      (GradientBoostingClassifier, {}), (SVC, {}),
                      (KNeighborsClassifier, {}), (XGBClassifier, {})]

        n_features_to_select = random.randint(5, 30)

        data = Dataset(x, y)
        context, pipeline_data = LocalExecutor(
            data, 2) << (Pipeline() >> PipelineStep(
                'model space', ModelSpace(model_list), initializer=True) >>
                         FormulaFeatureGenerator(['+', '-', '*', '/']) >>
                         Validate(test_size=0.1, metrics=roc_auc_score) >>
                         ChooseBest(1) >> RecursiveFeatureSelector(
                             n_features_to_select=n_features_to_select))

        preprocessing = Preprocessing()
        final_data = preprocessing.reproduce(pipeline_data.dataset,
                                             Dataset(x, y))
        self.assertEqual(pipeline_data.dataset.data.shape, final_data.shape)
        self.assertTrue((final_data == pipeline_data.dataset.data).all())
Exemplo n.º 7
0
    def test_voting_feature_selector(self):
        x, y = make_regression(
            n_samples=100,
            n_features=40,
            n_informative=2,
        )

        model_list = [(RandomForestRegressor, {}),
                      (GradientBoostingRegressor, {}), (SVR, {}),
                      (XGBRegressor, {})]

        data = Dataset(x, y)

        result_mult = []
        result_div = []
        context, pipeline_data = LocalExecutor(data, 10) << (
            Pipeline() >> PipelineStep(
                'model space', ModelSpace(model_list), initializer=True) >>
            FormulaFeatureGenerator(['+', '-', '*', '/']) >> Validate(
                test_size=0.1, metrics=mean_absolute_error) >> ChooseBest(
                    4, by_largest_score=False) >> VotingFeatureSelector(
                        feature_to_select=10, reverse_score=True))

        preprocessing = Preprocessing()
        final_data = preprocessing.reproduce(pipeline_data.dataset,
                                             Dataset(x, y))
        self.assertEqual(pipeline_data.dataset.data.shape, final_data.shape)
        self.assertTrue((final_data == pipeline_data.dataset.data).all())

        print('0' * 30)
        for result in pipeline_data.return_val:
            print(result.model, result.score)
        print(pipeline_data.dataset.data.shape)
        print('0' * 30)
Exemplo n.º 8
0
    def test_pipeline(self):
        df = pd.DataFrame([[1, 2], [3, 4]])
        X = Dataset(df, None)
        poly = PolynomialGenerator(interaction_only=True, degree=4)
        context, pipe_output = LocalExecutor(X) << (
            Pipeline() >> PipelineStep('generate_features', poly))

        self.assertEqual(pipe_output.dataset.data.shape, (2, 4))
Exemplo n.º 9
0
 def test_initializer(self):
     func = lambda x, context: context.epoch
     result = LocalExecutor(epochs=10) << (
         Pipeline() >> PipelineStep('a', func, initializer=True))
     self.assertEqual(result[1], 0)