示例#1
0
    def test_classification_11(self):
        for original_hyperpipe in self.hyperpipes:
            pipe = original_hyperpipe.copy_me()
            # Simple estimator Stack (train Random Forest on estimator stack proba outputs)
            # create estimator stack
            SVC1 = PipelineElement(
                "SVC",
                hyperparameters={
                    "kernel": Categorical(["linear"]),
                    "C": Categorical([0.01, 1, 5]),
                },
            )
            SVC2 = PipelineElement(
                "SVC",
                hyperparameters={
                    "kernel": Categorical(["rbf"]),
                    "C": Categorical([0.01, 1, 5]),
                },
            )
            RF = PipelineElement("RandomForestClassifier")
            # add to pipe
            pipe += Stack("estimator_stack",
                          elements=[SVC1, SVC2, RF],
                          use_probabilities=True)
            pipe += PipelineElement("RandomForestClassifier")

            self.run_hyperpipe(pipe, self.classification)
示例#2
0
    def test_classification_12(self):
        X, y = load_iris(True)
        # multiclass classification
        for original_hyperpipe in self.hyperpipes:
            pipe = original_hyperpipe.copy_me()
            # Simple estimator Stack (train Random Forest on estimator stack proba outputs)
            # create estimator stack
            SVC1 = PipelineElement(
                "SVC",
                hyperparameters={
                    "kernel": Categorical(["linear"]),
                    "C": Categorical([0.01, 1, 5]),
                },
            )
            SVC2 = PipelineElement(
                "SVC",
                hyperparameters={
                    "kernel": Categorical(["rbf"]),
                    "C": Categorical([0.01, 1, 5]),
                },
            )
            RF = PipelineElement("RandomForestClassifier")
            # add to pipe
            pipe += Stack("estimator_stack",
                          elements=[SVC1, SVC2, RF],
                          use_probabilities=True)
            pipe += PipelineElement("RandomForestClassifier")

            pipe.optimization.metrics = ["accuracy"]
            pipe.optimization.best_config_metric = "accuracy"

            pipe.fit(X, y)
示例#3
0
    def test_regression_9(self):
        for original_hyperpipe in self.hyperpipes:
            pipe = original_hyperpipe.copy_me()

            # sample pairing with confounder removal
            pipe += PipelineElement("StandardScaler")
            pipe += PipelineElement(
                "PCA",
                hyperparameters={"n_components": Categorical([None, 5])},
                test_disabled=True,
            )
            pipe += PipelineElement(
                "SamplePairingRegression",
                {
                    "draw_limit": [100],
                    "generator": Categorical(["nearest_pair", "random_pair"]),
                },
                distance_metric="euclidean",
                test_disabled=False,
            )
            pipe += PipelineElement(
                "SVR",
                hyperparameters={
                    "kernel": Categorical(["linear", "rbf"]),
                    "C": Categorical([0.01, 1, 5]),
                },
            )

            self.run_hyperpipe(pipe, self.regression)
示例#4
0
    def test_classification_6(self):
        for original_hyperpipe in self.hyperpipes:
            pipe = original_hyperpipe.copy_me()

            # Simple estimator Stack (use mean in the end)
            SVR = PipelineElement(
                "SVC",
                hyperparameters={
                    "kernel": Categorical(["linear", "rbf"]),
                    "C": Categorical([0.01, 1, 5]),
                },
            )
            RF = PipelineElement(
                "RandomForestClassifier",
                hyperparameters={
                    "min_samples_split":
                    FloatRange(start=0.05,
                               step=0.1,
                               stop=0.26,
                               range_type="range")
                },
            )
            pipe += Stack("estimator_stack", elements=[SVR, RF])
            pipe += PipelineElement("PhotonVotingClassifier")

            self.run_hyperpipe(pipe, self.classification)
示例#5
0
    def test_classification_2(self):
        for original_hyperpipe in self.hyperpipes:
            pipe = original_hyperpipe.copy_me()

            # Simple estimator Switch
            switch = Switch("estimator_switch")
            switch += PipelineElement(
                "SVC",
                hyperparameters={
                    "kernel": Categorical(["linear", "rbf"]),
                    "C": Categorical([0.01, 1, 5]),
                },
            )
            switch += PipelineElement(
                "RandomForestClassifier",
                hyperparameters={
                    "min_samples_split":
                    FloatRange(start=0.05,
                               step=0.1,
                               stop=0.26,
                               range_type="range")
                },
            )
            pipe += switch

            self.run_hyperpipe(pipe, self.classification)
示例#6
0
 def setUp(self):
     """
     Set default start setting for all tests.
     """
     self.intger_range = IntegerRange(2,6)
     self.float_range = FloatRange(0.1, 5.7)
     self.categorical = Categorical(["a","b","c","d","e","f","g","h"])
     self.bool = BooleanSwitch()
示例#7
0
    def test_classification_9(self):
        for original_hyperpipe in self.hyperpipes:
            pipe = original_hyperpipe.copy_me()

            # crazy everything
            pipe += PipelineElement("StandardScaler")
            pipe += PipelineElement(
                "SamplePairingClassification",
                {
                    "draw_limit": [100],
                    "generator": Categorical(["nearest_pair", "random_pair"]),
                },
                distance_metric="euclidean",
                test_disabled=True,
            )
            # setup pipeline branches with half of the features each
            # if both PCAs are disabled, features are simply concatenated and passed to the final estimator
            source1_branch = Branch("source1_features")
            # first half of features (for Boston Housing, same as indices=[0, 1, 2, 3, 4, 5]
            source1_branch += DataFilter(indices=np.arange(
                start=0, stop=int(np.floor(self.X_shape[1] / 2))))
            source1_branch += PipelineElement(
                "PCA",
                hyperparameters={"n_components": Categorical([None, 5])},
                test_disabled=True,
            )

            source2_branch = Branch("source2_features")
            # second half of features (for Boston Housing, same is indices=[6, 7, 8, 9, 10, 11, 12]
            source2_branch += DataFilter(indices=np.arange(
                start=int(np.floor(self.X_shape[1] /
                                   2)), stop=self.X_shape[1]))
            source2_branch += PipelineElement(
                "PCA",
                hyperparameters={"n_components": Categorical([None, 5])},
                test_disabled=True,
            )
            # setup source branches and stack their output (i.e. horizontal concatenation)
            pipe += Stack("source_stack",
                          elements=[source1_branch, source2_branch])
            # final estimator with stack output as features
            pipe += PipelineElement(
                "RandomForestClassifier",
                hyperparameters={
                    "min_samples_split":
                    FloatRange(start=0.05,
                               step=0.1,
                               stop=0.26,
                               range_type="range")
                },
            )

            self.run_hyperpipe(pipe, self.classification)
示例#8
0
    def test_classification_6(self):
        for original_hyperpipe in self.hyperpipes:
            pipe = original_hyperpipe.copy_me()

            # Simple estimator Stack (use mean in the end)
            SVR = PipelineElement('SVC', hyperparameters={'kernel': Categorical(['linear', 'rbf']),
                                                          'C': Categorical([.01, 1, 5])})
            RF = PipelineElement('RandomForestClassifier', hyperparameters={
                'min_samples_split': FloatRange(start=.05, step=.1, stop=.26, range_type='range')})
            pipe += Stack('estimator_stack', elements=[SVR, RF])
            pipe += PipelineElement('PhotonVotingClassifier')

            self.run_hyperpipe(pipe, self.classification)
示例#9
0
    def test_classification_7(self):
        for original_hyperpipe in self.hyperpipes:
            pipe = original_hyperpipe.copy_me()

            # Simple estimator Stack, but use same machine twice
            SVC1 = PipelineElement('SVC',
                                   hyperparameters={'kernel': Categorical(['linear']), 'C': Categorical([.01, 1, 5])})
            SVC2 = PipelineElement('SVC',
                                   hyperparameters={'kernel': Categorical(['rbf']), 'C': Categorical([.01, 1, 5])})
            pipe += Stack('estimator_stack', elements=[SVC1, SVC2])
            pipe += PipelineElement('PhotonVotingClassifier')

            self.run_hyperpipe(pipe, self.classification)
示例#10
0
    def test_classification_2(self):
        for original_hyperpipe in self.hyperpipes:
            pipe = original_hyperpipe.copy_me()

            # Simple estimator Switch
            switch = Switch('estimator_switch')
            switch += PipelineElement('SVC', hyperparameters={'kernel': Categorical(['linear', 'rbf']),
                                                              'C': Categorical([.01, 1, 5])})
            switch += PipelineElement('RandomForestClassifier', hyperparameters={
                'min_samples_split': FloatRange(start=.05, step=.1, stop=.26, range_type='range')})
            pipe += switch

            self.run_hyperpipe(pipe, self.classification)
示例#11
0
    def test_regression_9(self):
        for original_hyperpipe in self.hyperpipes:
            pipe = original_hyperpipe.copy_me()

            # sample pairing with confounder removal
            pipe += PipelineElement('StandardScaler')
            pipe += PipelineElement('PCA', hyperparameters={'n_components': Categorical([None, 5])}, test_disabled=True)
            pipe += PipelineElement('SamplePairingRegression',
                                    {'draw_limit': [100], 'generator': Categorical(['nearest_pair', 'random_pair'])},
                                    distance_metric='euclidean', test_disabled=False)
            pipe += PipelineElement('SVR', hyperparameters={'kernel': Categorical(['linear', 'rbf']),
                                                            'C': Categorical([.01, 1, 5])})

            self.run_hyperpipe(pipe, self.regression)
示例#12
0
    def test_classification_11(self):
        for original_hyperpipe in self.hyperpipes:
            pipe = original_hyperpipe.copy_me()
            # Simple estimator Stack (train Random Forest on estimator stack proba outputs)
            # create estimator stack
            SVC1 = PipelineElement('SVC',
                                   hyperparameters={'kernel': Categorical(['linear']), 'C': Categorical([.01, 1, 5])})
            SVC2 = PipelineElement('SVC',
                                   hyperparameters={'kernel': Categorical(['rbf']), 'C': Categorical([.01, 1, 5])})
            RF = PipelineElement('RandomForestClassifier')
            # add to pipe
            pipe += Stack('estimator_stack', elements=[SVC1, SVC2, RF], use_probabilities=True)
            pipe += PipelineElement('RandomForestClassifier')

            self.run_hyperpipe(pipe, self.classification)
示例#13
0
    def test_cv_config_and_dummy_nr(self):
        X, y = load_boston(return_X_y=True)
        self.hyperpipe += PipelineElement('StandardScaler')
        self.hyperpipe += PipelineElement('PCA', {'n_components': IntegerRange(3, 5)})
        self.hyperpipe += PipelineElement('SVR', {'C': FloatRange(0.001, 10, num=5),
                                                  'kernel': Categorical(['linear', 'rbf'])})

        self.hyperpipe.fit(X, y)

        expected_configs = 2 * 5 * 2

        # check version is present
        self.assertIsNotNone(self.hyperpipe.results.version)

        # check nr of outer and inner folds
        self.assertTrue(len(self.hyperpipe.results.outer_folds) == self.outer_fold_nr)
        self.assertTrue(len(self.hyperpipe.cross_validation.outer_folds) == self.outer_fold_nr)

        for outer_fold_id, inner_folds in self.hyperpipe.cross_validation.inner_folds.items():
            self.assertTrue(len(inner_folds) == self.inner_fold_nr)

        for outer_fold_result in self.hyperpipe.results.outer_folds:
            # check that we have the right amount of configs tested in each outer fold
            self.assertTrue(len(outer_fold_result.tested_config_list) == expected_configs)

            for config_result in outer_fold_result.tested_config_list:
                # check that we have the right amount of inner-folds per config
                self.assertTrue(len(config_result.inner_folds) == self.inner_fold_nr)

        self.check_for_dummy()
示例#14
0
 def test_categorical(self):
     """
     Test for class Categorical.
     """
     items = "Lorem ipsum dolor sit amet consetetur sadipscing elitr".split(" ")
     categorical = Categorical(values=items)
     self.assertListEqual(categorical.values, items)
示例#15
0
    def test_classification_9(self):
        for original_hyperpipe in self.hyperpipes:
            pipe = original_hyperpipe.copy_me()

            # crazy everything
            pipe += PipelineElement('StandardScaler')
            pipe += PipelineElement('SamplePairingClassification',
                                    {'draw_limit': [100], 'generator': Categorical(['nearest_pair', 'random_pair'])},
                                    distance_metric='euclidean', test_disabled=True)
            # setup pipeline branches with half of the features each
            # if both PCAs are disabled, features are simply concatenated and passed to the final estimator
            source1_branch = Branch('source1_features')
            # first half of features (for Boston Housing, same as indices=[0, 1, 2, 3, 4, 5]
            source1_branch += DataFilter(indices=np.arange(start=0, stop=int(np.floor(self.X_shape[1] / 2))))
            source1_branch += PipelineElement('PCA', hyperparameters={'n_components': Categorical([None, 5])},
                                              test_disabled=True)

            source2_branch = Branch('source2_features')
            # second half of features (for Boston Housing, same is indices=[6, 7, 8, 9, 10, 11, 12]
            source2_branch += DataFilter(indices=np.arange(start=int(np.floor(self.X_shape[1] / 2)), stop=self.X_shape[1]))
            source2_branch += PipelineElement('PCA', hyperparameters={'n_components': Categorical([None, 5])},
                                              test_disabled=True)
            # setup source branches and stack their output (i.e. horizontal concatenation)
            pipe += Stack('source_stack', elements=[source1_branch, source2_branch])
            # final estimator with stack output as features
            pipe += PipelineElement('RandomForestClassifier', hyperparameters={
                'min_samples_split': FloatRange(start=.05, step=.1, stop=.26, range_type='range')})

            self.run_hyperpipe(pipe, self.classification)
示例#16
0
    def test_classification_1(self):
        for original_hyperpipe in self.hyperpipes:
            pipe = original_hyperpipe.copy_me()

            pipe += PipelineElement(name='SVC', hyperparameters={'kernel': Categorical(['linear', 'rbf'])})

            self.run_hyperpipe(pipe, self.classification)
示例#17
0
    def test_regression_1(self):
        for original_hyperpipe in self.hyperpipes:
            pipe = original_hyperpipe.copy_me()
            pipe += PipelineElement(
                name="SVR",
                hyperparameters={"kernel": Categorical(["linear", "rbf"])})

            self.run_hyperpipe(pipe, self.regression)
示例#18
0
class BaseTest(unittest.TestCase):
    def setUp(self):
        """
        Set default start setting for all tests.
        """
        self.intger_range = IntegerRange(2, 6)
        self.float_range = FloatRange(0.1, 5.7)
        self.categorical = Categorical(
            ["a", "b", "c", "d", "e", "f", "g", "h"])
        self.bool = BooleanSwitch()

    def test_rand_success(self):

        for _ in range(100):
            self.assertIn(self.intger_range.get_random_value(),
                          list(range(2, 6)))

            self.assertGreaterEqual(self.float_range.get_random_value(), 0.1)
            self.assertLess(self.float_range.get_random_value(), 5.7)

            self.assertIn(
                self.categorical.get_random_value(),
                ["a", "b", "c", "d", "e", "f", "g", "h"],
            )

            self.assertIn(self.bool.get_random_value(), [True, False])

        self.float_range.transform()
        self.intger_range.transform()

        for _ in range(100):
            self.assertIn(
                self.intger_range.get_random_value(definite_list=True),
                self.intger_range.values,
            )
            self.assertIn(
                self.float_range.get_random_value(definite_list=True),
                self.float_range.values,
            )

    def test_rand_error(self):
        with self.assertRaises(ValueError):
            self.intger_range.get_random_value(definite_list=True)
            self.float_range.get_random_value(definite_list=True)
            self.bool.get_random_value(definite_list=True)
            self.categorical.get_random_value(definite_list=True)
示例#19
0
    def test_class_with_data_preproc(self):
        """
        Test for simple pipeline with data.
        """

        X, y = load_breast_cancer(return_X_y=True)

        # DESIGN YOUR PIPELINE
        my_pipe = Hyperpipe(
            'basic_svm_pipe',
            optimizer='grid_search',
            metrics=['accuracy', 'precision', 'recall', 'balanced_accuracy'],
            best_config_metric='accuracy',
            eval_final_performance=False,
            outer_cv=KFold(n_splits=2),
            inner_cv=KFold(n_splits=3),
            verbosity=1,
            random_seed=42)

        preprocessing = Preprocessing()
        preprocessing += PipelineElement("LabelEncoder")
        my_pipe += preprocessing

        # ADD ELEMENTS TO YOUR PIPELINE
        # first normalize all features
        my_pipe.add(PipelineElement('StandardScaler'))

        # then do feature selection using a PCA,
        my_pipe += PipelineElement(
            'PCA',
            hyperparameters={'n_components': IntegerRange(10, 12)},
            test_disabled=True)

        # engage and optimize the good old SVM for Classification
        my_pipe += PipelineElement(
            'SVC',
            hyperparameters={'kernel': Categorical(['rbf', 'linear'])},
            C=2,
            gamma='scale')

        # NOW TRAIN YOUR PIPELINE
        my_pipe.fit(X, y)

        json_transformer = JsonTransformer()

        pipe_json = json_transformer.create_json(my_pipe)
        a = elements_to_dict(my_pipe.copy_me())
        my_pipe_reload = json_transformer.from_json(pipe_json)
        pipe_json_reload = pipe_json = json_transformer.create_json(
            my_pipe_reload)

        self.assertEqual(pipe_json, pipe_json_reload)
        my_pipe_reload.fit(X, y)

        self.assertDictEqual(my_pipe.best_config, my_pipe_reload.best_config)

        self.assertDictEqual(elements_to_dict(my_pipe.copy_me()),
                             elements_to_dict(my_pipe_reload.copy_me()))
示例#20
0
    def test_class_with_data_01(self):
        """
        Test for simple pipeline with data.
        """

        X, y = load_breast_cancer(True)

        # DESIGN YOUR PIPELINE
        my_pipe = Hyperpipe(
            "basic_svm_pipe",
            optimizer="grid_search",
            metrics=["accuracy", "precision", "recall", "balanced_accuracy"],
            best_config_metric="accuracy",
            eval_final_performance=False,
            outer_cv=KFold(n_splits=2),
            inner_cv=KFold(n_splits=3),
            verbosity=1,
            random_seed=42,
        )

        preprocessing = Preprocessing()
        preprocessing += PipelineElement("LabelEncoder")
        my_pipe += preprocessing

        # ADD ELEMENTS TO YOUR PIPELINE
        # first normalize all features
        my_pipe.add(PipelineElement("StandardScaler"))

        # then do feature selection using a PCA,
        my_pipe += PipelineElement(
            "PCA",
            hyperparameters={"n_components": IntegerRange(10, 12)},
            test_disabled=True,
        )

        # engage and optimize the good old SVM for Classification
        my_pipe += PipelineElement(
            "SVC",
            hyperparameters={"kernel": Categorical(["rbf", "linear"])},
            C=2,
            gamma="scale",
        )

        # NOW TRAIN YOUR PIPELINE
        my_pipe.fit(X, y)

        json_transformer = JsonTransformer()
        pipe_json = json_transformer.create_json(my_pipe)
        my_pipe_reload = json_transformer.from_json(pipe_json)
        pipe_json_reload = pipe_json = json_transformer.create_json(
            my_pipe_reload)

        self.assertEqual(pipe_json, pipe_json_reload)

        my_pipe_reload.fit(X, y)
        self.assertDictEqual(my_pipe.best_config, my_pipe_reload.best_config)
示例#21
0
    def test_classification_5(self):
        for original_hyperpipe in self.hyperpipes:
            pipe = original_hyperpipe.copy_me()

            # multi-switch
            # setup switch to choose between PCA or simple feature selection and add it to the pipe
            pre_switch = Switch("preproc_switch")
            pre_switch += PipelineElement(
                "PCA",
                hyperparameters={"n_components": Categorical([None, 5])},
                test_disabled=True,
            )
            pre_switch += PipelineElement(
                "FClassifSelectPercentile",
                hyperparameters={
                    "percentile":
                    IntegerRange(start=5, step=20, stop=66, range_type="range")
                },
                test_disabled=True,
            )
            pipe += pre_switch
            # setup estimator switch and add it to the pipe
            estimator_switch = Switch("estimator_switch")
            estimator_switch += PipelineElement(
                "SVC",
                hyperparameters={
                    "kernel": Categorical(["linear", "rbf"]),
                    "C": Categorical([0.01, 1, 5]),
                },
            )
            estimator_switch += PipelineElement(
                "RandomForestClassifier",
                hyperparameters={
                    "min_samples_split":
                    FloatRange(start=0.05,
                               step=0.1,
                               stop=0.26,
                               range_type="range")
                },
            )
            pipe += estimator_switch

            self.run_hyperpipe(pipe, self.classification)
示例#22
0
    def test_classification_12(self):
        X, y = load_iris(True)
        # multiclass classification
        for original_hyperpipe in self.hyperpipes:
            pipe = original_hyperpipe.copy_me()
            # Simple estimator Stack (train Random Forest on estimator stack proba outputs)
            # create estimator stack
            SVC1 = PipelineElement('SVC',
                                   hyperparameters={'kernel': Categorical(['linear']), 'C': Categorical([.01, 1, 5])})
            SVC2 = PipelineElement('SVC',
                                   hyperparameters={'kernel': Categorical(['rbf']), 'C': Categorical([.01, 1, 5])})
            RF = PipelineElement('RandomForestClassifier')
            # add to pipe
            pipe += Stack('estimator_stack', elements=[SVC1, SVC2, RF], use_probabilities=True)
            pipe += PipelineElement('RandomForestClassifier')

            pipe.optimization.metrics = ['accuracy']
            pipe.optimization.best_config_metric = 'accuracy'

            pipe.fit(X, y)
示例#23
0
    def test_classification_5(self):
        for original_hyperpipe in self.hyperpipes:
            pipe = original_hyperpipe.copy_me()

            # multi-switch
            # setup switch to choose between PCA or simple feature selection and add it to the pipe
            pre_switch = Switch('preproc_switch')
            pre_switch += PipelineElement('PCA', hyperparameters={'n_components': Categorical([None, 5])},
                                          test_disabled=True)
            pre_switch += PipelineElement('FClassifSelectPercentile', hyperparameters={
                'percentile': IntegerRange(start=5, step=20, stop=66, range_type='range')}, test_disabled=True)
            pipe += pre_switch
            # setup estimator switch and add it to the pipe
            estimator_switch = Switch('estimator_switch')
            estimator_switch += PipelineElement('SVC', hyperparameters={'kernel': Categorical(['linear', 'rbf']),
                                                                        'C': Categorical([.01, 1, 5])})
            estimator_switch += PipelineElement('RandomForestClassifier', hyperparameters={
                'min_samples_split': FloatRange(start=.05, step=.1, stop=.26, range_type='range')})
            pipe += estimator_switch

            self.run_hyperpipe(pipe, self.classification)
示例#24
0
    def test_classification_7(self):
        for original_hyperpipe in self.hyperpipes:
            pipe = original_hyperpipe.copy_me()

            # Simple estimator Stack, but use same machine twice
            SVC1 = PipelineElement(
                "SVC",
                hyperparameters={
                    "kernel": Categorical(["linear"]),
                    "C": Categorical([0.01, 1, 5]),
                },
            )
            SVC2 = PipelineElement(
                "SVC",
                hyperparameters={
                    "kernel": Categorical(["rbf"]),
                    "C": Categorical([0.01, 1, 5]),
                },
            )
            pipe += Stack("estimator_stack", elements=[SVC1, SVC2])
            pipe += PipelineElement("PhotonVotingClassifier")

            self.run_hyperpipe(pipe, self.classification)
示例#25
0
    def test_classification_8(self):
        for original_hyperpipe in self.hyperpipes:
            pipe = original_hyperpipe.copy_me()

            pipe += PipelineElement('StandardScaler')
            # setup pipeline branches with half of the features each
            # if both PCAs are disabled, features are simply concatenated and passed to the final estimator
            source1_branch = Branch('source1_features')
            # first half of features (for Boston Housing, same as indices=[0, 1, 2, 3, 4, 5]
            source1_branch += DataFilter(indices=np.arange(start=0, stop=int(np.floor(self.X_shape[1] / 2))))
            source1_branch += PipelineElement('ConfounderRemoval', {}, standardize_covariates=True, test_disabled=True,
                                              confounder_names=['cov1', 'cov2'])
            source1_branch += PipelineElement('PCA', hyperparameters={'n_components': Categorical([None, 5])},
                                              test_disabled=True)

            source2_branch = Branch('source2_features')
            # second half of features (for Boston Housing, same is indices=[6, 7, 8, 9, 10, 11, 12]
            source2_branch += DataFilter(indices=np.arange(start=int(np.floor(self.X_shape[1] / 2)), stop=self.X_shape[1]))
            source2_branch += PipelineElement('ConfounderRemoval', {}, standardize_covariates=True, test_disabled=True,
                                              confounder_names=['cov1', 'cov2'])
            source2_branch += PipelineElement('PCA', hyperparameters={'n_components': Categorical([None, 5])},
                                              test_disabled=True)

            # setup source branches and stack their output (i.e. horizontal concatenation)
            pipe += Stack('source_stack', elements=[source1_branch, source2_branch])

            # final estimator with stack output as features
            # setup estimator switch and add it to the pipe
            switch = Switch('estimator_switch')
            switch += PipelineElement('SVC', hyperparameters={'kernel': Categorical(['linear', 'rbf']),
                                                              'C': Categorical([.01, 1, 5])})
            switch += PipelineElement('RandomForestClassifier', hyperparameters={
                'min_samples_split': FloatRange(start=.05, step=.1, stop=.26, range_type='range')})
            pipe += switch

            self.run_hyperpipe(pipe, self.classification)
示例#26
0
    def test_cv_config_and_dummy_nr(self):
        X, y = load_boston(True)
        self.hyperpipe += PipelineElement("StandardScaler")
        self.hyperpipe += PipelineElement("PCA",
                                          {"n_components": IntegerRange(3, 7)})
        self.hyperpipe += PipelineElement(
            "SVR",
            {
                "C": FloatRange(0.001, 10, num=10),
                "kernel": Categorical(["linear", "rbf"]),
            },
        )

        self.hyperpipe.fit(X, y)

        expected_configs = 4 * 10 * 2

        # check version is present
        self.assertIsNotNone(self.hyperpipe.results.version)

        # check nr of outer and inner folds
        self.assertTrue(
            len(self.hyperpipe.results.outer_folds) == self.outer_fold_nr)
        self.assertTrue(
            len(self.hyperpipe.cross_validation.outer_folds) ==
            self.outer_fold_nr)

        for (
                outer_fold_id,
                inner_folds,
        ) in self.hyperpipe.cross_validation.inner_folds.items():
            self.assertTrue(len(inner_folds) == self.inner_fold_nr)

        for outer_fold_result in self.hyperpipe.results.outer_folds:
            # check that we have the right amount of configs tested in each outer fold
            self.assertTrue(
                len(outer_fold_result.tested_config_list) == expected_configs)

            for config_result in outer_fold_result.tested_config_list:
                # check that we have the right amount of inner-folds per config
                self.assertTrue(
                    len(config_result.inner_folds) == self.inner_fold_nr)

        self.check_for_dummy()
示例#27
0
    best_config_metric="mean_absolute_error",
    outer_cv=ShuffleSplit(n_splits=2, test_size=0.2),
    inner_cv=ShuffleSplit(n_splits=2, test_size=0.2),
    verbosity=1,
    cache_folder="./cache",
    output_settings=settings,
)

# CREATE NEURO BRANCH
# specify the number of processes that should be used
neuro_branch = NeuroBranch("NeuroBranch", nr_of_processes=1)

# resample images to a desired voxel size - this also works with voxel_size as hyperparameter
# it's also very reasonable to define a batch size for a large number of subjects
neuro_branch += PipelineElement(
    "ResampleImages", hyperparameters={"voxel_size": Categorical([3, 5])}, batch_size=20
)

# additionally, you can smooth the entire image
neuro_branch += PipelineElement(
    "SmoothImages", {"fwhm": Categorical([6, 8])}, batch_size=20
)

# now, apply a brain atlas and extract 4 ROIs
# set "extract_mode" to "vec" so that all voxels within these ROIs are vectorized and concatenated
neuro_branch += PipelineElement(
    "BrainAtlas",
    hyperparameters={},
    rois=["Hippocampus_L", "Hippocampus_R", "Amygdala_L", "Amygdala_R"],
    atlas_name="AAL",
    extract_mode="vec",
示例#28
0
model.add(MaxPooling1D(pool_size=2))
model.add(Flatten())
model.add(Dense(100, activation='relu'))
model.add(Dense(n_outputs, activation='softmax'))
model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

settings = OutputSettings(project_folder='./tmp/')

# DESIGN YOUR PIPELINE
my_pipe = Hyperpipe('cnn_keras_multiclass_pipe',
                    optimizer='grid_search',
                    optimizer_params={},
                    metrics=['accuracy'],
                    best_config_metric='accuracy',
                    outer_cv=KFold(n_splits=3),
                    inner_cv=KFold(n_splits=2),
                    verbosity=1,
                    output_settings=settings)

my_pipe += PipelineElement('KerasBaseClassifier',
                           hyperparameters={'epochs': Categorical([10, 20])},
                           verbosity=1,
                           model=model)

# NOW TRAIN YOUR PIPELINE
my_pipe.fit(X, y)

Investigator.show(my_pipe)
示例#29
0
                    inner_cv=KFold(n_splits=10),
                    verbosity=1,
                    output_settings=OutputSettings(project_folder='./tmp/'))

# BRANCH WITH QUANTILTRANSFORMER AND DECISIONTREECLASSIFIER
tree_qua_branch = Branch('tree_branch')
tree_qua_branch += PipelineElement('QuantileTransformer')
tree_qua_branch += PipelineElement('DecisionTreeClassifier',
                                   {'min_samples_split': IntegerRange(2, 4)},
                                   criterion='gini')

# BRANCH WITH MinMaxScaler AND DecisionTreeClassifier
svm_mima_branch = Branch('svm_branch')
svm_mima_branch += PipelineElement('MinMaxScaler')
svm_mima_branch += PipelineElement('SVC', {
    'kernel': Categorical(['rbf', 'linear']),
    'C': IntegerRange(0.01, 2.0)
},
                                   gamma='auto')

# BRANCH WITH StandardScaler AND KNeighborsClassifier
knn_sta_branch = Branch('neighbour_branch')
knn_sta_branch += PipelineElement('StandardScaler')
knn_sta_branch += PipelineElement('KNeighborsClassifier')

# voting = True to mean the result of every branch
my_pipe += Stack('final_stack',
                 [tree_qua_branch, svm_mima_branch, knn_sta_branch])

my_pipe += PipelineElement('LogisticRegression', solver='lbfgs')
示例#30
0
    outer_cv=KFold(n_splits=2),
    inner_cv=KFold(n_splits=2),
    verbosity=1,
    output_settings=OutputSettings(project_folder="./tmp/"),
)

# ADD ELEMENTS TO YOUR PIPELINE
my_pipe.add(PipelineElement("StandardScaler"))

# attention: hidden_layer count == activation size. So if you want to choose a function in every layer,
# grid_search does not forbid combinations with len(hidden_layer_size) != len(activations)

# USE KERASDNNCLASSIFIER FOR CLASSIFICATION
my_pipe += PipelineElement(
    "KerasDnnRegressor",
    hyperparameters={
        "hidden_layer_sizes": Categorical([[10, 8, 4], [20, 5]]),
        "dropout_rate": Categorical([0.5, 0.2]),
    },
    activations="relu",
    epochs=50,
    batch_size=32,
    verbosity=1,
)

# NOW TRAIN YOUR PIPELINE
my_pipe.fit(X, y)

debug = True
Investigator.show(my_pipe)