Пример #1
0
    def test_NumberRange(self):
        """
        Test for class IntegerRange and FloatRange.
        """
        dtypes = {int: np.int32, float: np.float32}

        expected_linspace = [1, 2, 3, 4, 5, 6, 7, 8, 9]
        number_range_linspace = IntegerRange(start=1, stop=9, num=9, range_type="linspace")
        number_range_linspace.transform()
        self.assertListEqual(expected_linspace, number_range_linspace.values)

        expected_geomspace = [1, 10, 100, 1000, 10000]
        number_range_geomspace = IntegerRange(1, 10000, num=5, range_type="geomspace")
        number_range_geomspace.transform()
        self.assertListEqual(expected_geomspace, number_range_geomspace.values)

        number_range_range = IntegerRange(self.start, self.end, step=2, range_type="range")
        number_range_range.transform()
        self.assertListEqual(number_range_range.values, list(np.arange(self.start, self.end, 2)))

        number_range_logspace = FloatRange(-1, 1, num=50, range_type='logspace')
        number_range_logspace.transform()
        np.testing.assert_array_almost_equal(number_range_logspace.values,  np.logspace(-1, 1, num=50).tolist())

        # error tests
        with self.assertRaises(ValueError):
            number_range = IntegerRange(start=0, stop=self.end, range_type="geomspace")
            number_range.transform()

        with self.assertRaises(ValueError):
            number_range = IntegerRange(start=1, stop=15, range_type="logspace")
            number_range.transform()

        with self.assertRaises(ValueError):
            IntegerRange(start=self.start, stop=self.end, range_type="ownspace")
Пример #2
0
 def setUp(self):
     """
     Set default start setting for all tests.
     """
     self.intger_range = IntegerRange(2,6)
     self.float_range = FloatRange(0.1, 5.7)
     self.categorical = Categorical(["a","b","c","d","e","f","g","h"])
     self.bool = BooleanSwitch()
Пример #3
0
    def test_domain(self):

        self.float_range.transform()
        self.intger_range.transform()
        self.assertListEqual(self.intger_range.values, list(np.arange(2, 6)))
        self.assertListEqual(self.float_range.values,
                             list(np.linspace(0.1, 5.7, dtype=np.float64)))

        big_float_range = FloatRange(-300.57, np.pi * 4000)
        big_float_range.transform()
        self.assertListEqual(big_float_range.values,
                             list(np.linspace(-300.57, np.pi * 4000)))
        self.assertListEqual(self.categorical.values,
                             ["a", "b", "c", "d", "e", "f", "g", "h"])
        self.assertListEqual(self.bool.values, [True, False])
Пример #4
0
    def test_classification_2(self):
        for original_hyperpipe in self.hyperpipes:
            pipe = original_hyperpipe.copy_me()

            # Simple estimator Switch
            switch = Switch("estimator_switch")
            switch += PipelineElement(
                "SVC",
                hyperparameters={
                    "kernel": Categorical(["linear", "rbf"]),
                    "C": Categorical([0.01, 1, 5]),
                },
            )
            switch += PipelineElement(
                "RandomForestClassifier",
                hyperparameters={
                    "min_samples_split":
                    FloatRange(start=0.05,
                               step=0.1,
                               stop=0.26,
                               range_type="range")
                },
            )
            pipe += switch

            self.run_hyperpipe(pipe, self.classification)
Пример #5
0
    def test_cv_config_and_dummy_nr(self):
        X, y = load_boston(return_X_y=True)
        self.hyperpipe += PipelineElement('StandardScaler')
        self.hyperpipe += PipelineElement('PCA', {'n_components': IntegerRange(3, 5)})
        self.hyperpipe += PipelineElement('SVR', {'C': FloatRange(0.001, 10, num=5),
                                                  'kernel': Categorical(['linear', 'rbf'])})

        self.hyperpipe.fit(X, y)

        expected_configs = 2 * 5 * 2

        # check version is present
        self.assertIsNotNone(self.hyperpipe.results.version)

        # check nr of outer and inner folds
        self.assertTrue(len(self.hyperpipe.results.outer_folds) == self.outer_fold_nr)
        self.assertTrue(len(self.hyperpipe.cross_validation.outer_folds) == self.outer_fold_nr)

        for outer_fold_id, inner_folds in self.hyperpipe.cross_validation.inner_folds.items():
            self.assertTrue(len(inner_folds) == self.inner_fold_nr)

        for outer_fold_result in self.hyperpipe.results.outer_folds:
            # check that we have the right amount of configs tested in each outer fold
            self.assertTrue(len(outer_fold_result.tested_config_list) == expected_configs)

            for config_result in outer_fold_result.tested_config_list:
                # check that we have the right amount of inner-folds per config
                self.assertTrue(len(config_result.inner_folds) == self.inner_fold_nr)

        self.check_for_dummy()
Пример #6
0
    def test_huge_combinations(self):
        hp = Hyperpipe(
            "huge_combinations",
            metrics=["accuracy"],
            best_config_metric="accuracy",
            output_settings=OutputSettings(
                project_folder=self.tmp_folder_path),
        )

        hp += PipelineElement("PCA", hyperparameters={"n_components": [5, 10]})
        stack = Stack("ensemble")
        for i in range(20):
            stack += PipelineElement(
                "SVC",
                hyperparameters={
                    "C": FloatRange(0.001, 5),
                    "kernel": ["linear", "rbf", "sigmoid", "polynomial"],
                },
            )
        hp += stack
        hp += PipelineElement(
            "SVC", hyperparameters={"kernel": ["linear", "rbf", "sigmoid"]})
        X, y = load_breast_cancer(True)
        with self.assertRaises(Warning):
            hp.fit(X, y)
Пример #7
0
    def test_classification_9(self):
        for original_hyperpipe in self.hyperpipes:
            pipe = original_hyperpipe.copy_me()

            # crazy everything
            pipe += PipelineElement('StandardScaler')
            pipe += PipelineElement('SamplePairingClassification',
                                    {'draw_limit': [100], 'generator': Categorical(['nearest_pair', 'random_pair'])},
                                    distance_metric='euclidean', test_disabled=True)
            # setup pipeline branches with half of the features each
            # if both PCAs are disabled, features are simply concatenated and passed to the final estimator
            source1_branch = Branch('source1_features')
            # first half of features (for Boston Housing, same as indices=[0, 1, 2, 3, 4, 5]
            source1_branch += DataFilter(indices=np.arange(start=0, stop=int(np.floor(self.X_shape[1] / 2))))
            source1_branch += PipelineElement('PCA', hyperparameters={'n_components': Categorical([None, 5])},
                                              test_disabled=True)

            source2_branch = Branch('source2_features')
            # second half of features (for Boston Housing, same is indices=[6, 7, 8, 9, 10, 11, 12]
            source2_branch += DataFilter(indices=np.arange(start=int(np.floor(self.X_shape[1] / 2)), stop=self.X_shape[1]))
            source2_branch += PipelineElement('PCA', hyperparameters={'n_components': Categorical([None, 5])},
                                              test_disabled=True)
            # setup source branches and stack their output (i.e. horizontal concatenation)
            pipe += Stack('source_stack', elements=[source1_branch, source2_branch])
            # final estimator with stack output as features
            pipe += PipelineElement('RandomForestClassifier', hyperparameters={
                'min_samples_split': FloatRange(start=.05, step=.1, stop=.26, range_type='range')})

            self.run_hyperpipe(pipe, self.classification)
Пример #8
0
    def test_classification_6(self):
        for original_hyperpipe in self.hyperpipes:
            pipe = original_hyperpipe.copy_me()

            # Simple estimator Stack (use mean in the end)
            SVR = PipelineElement(
                "SVC",
                hyperparameters={
                    "kernel": Categorical(["linear", "rbf"]),
                    "C": Categorical([0.01, 1, 5]),
                },
            )
            RF = PipelineElement(
                "RandomForestClassifier",
                hyperparameters={
                    "min_samples_split":
                    FloatRange(start=0.05,
                               step=0.1,
                               stop=0.26,
                               range_type="range")
                },
            )
            pipe += Stack("estimator_stack", elements=[SVR, RF])
            pipe += PipelineElement("PhotonVotingClassifier")

            self.run_hyperpipe(pipe, self.classification)
Пример #9
0
        def test_one_hyperpipe(learning_curves, learning_curves_cut):
            if learning_curves and learning_curves_cut is None:
                learning_curves_cut = FloatRange(0, 1, 'range', 0.2)
            output_settings = OutputSettings(
                project_folder=self.tmp_folder_path, save_output=False)
            test_hyperpipe = Hyperpipe(
                'test_pipe',
                learning_curves=learning_curves,
                learning_curves_cut=learning_curves_cut,
                metrics=['accuracy', 'recall', 'specificity'],
                best_config_metric='accuracy',
                inner_cv=self.inner_cv,
                output_settings=output_settings)

            self.assertEqual(test_hyperpipe.cross_validation.learning_curves,
                             learning_curves)
            if learning_curves:
                self.assertEqual(
                    test_hyperpipe.cross_validation.learning_curves_cut,
                    learning_curves_cut)
            else:
                self.assertIsNone(
                    test_hyperpipe.cross_validation.learning_curves_cut)

            test_hyperpipe += PipelineElement('StandardScaler')
            test_hyperpipe += PipelineElement('PCA', {'n_components': [1, 2]},
                                              random_state=42)
            test_hyperpipe += PipelineElement('SVC', {
                'C': [0.1],
                'kernel': ['linear']
            },
                                              random_state=42)
            test_hyperpipe.fit(self.X, self.y)
            config_results = test_hyperpipe.results_handler.results.outer_folds[
                0].tested_config_list
            config_num = len(config_results)
            for config_nr in range(config_num):
                for inner_fold_nr in range(self.inner_cv.n_splits):
                    curves = config_results[config_nr].inner_folds[
                        inner_fold_nr].learning_curves
                    if learning_curves:
                        self.assertEqual(len(curves),
                                         len(learning_curves_cut.values))
                        for learning_point_nr in range(
                                len(learning_curves_cut.values)):
                            test_metrics = list(
                                curves[learning_point_nr][1].keys())
                            train_metrics = list(
                                curves[learning_point_nr][2].keys())
                            self.assertEqual(
                                test_hyperpipe.optimization.metrics,
                                test_metrics)
                            self.assertEqual(
                                test_hyperpipe.optimization.metrics,
                                train_metrics)
                    else:
                        self.assertEqual(curves, [])
Пример #10
0
    def test_classification_9(self):
        for original_hyperpipe in self.hyperpipes:
            pipe = original_hyperpipe.copy_me()

            # crazy everything
            pipe += PipelineElement("StandardScaler")
            pipe += PipelineElement(
                "SamplePairingClassification",
                {
                    "draw_limit": [100],
                    "generator": Categorical(["nearest_pair", "random_pair"]),
                },
                distance_metric="euclidean",
                test_disabled=True,
            )
            # setup pipeline branches with half of the features each
            # if both PCAs are disabled, features are simply concatenated and passed to the final estimator
            source1_branch = Branch("source1_features")
            # first half of features (for Boston Housing, same as indices=[0, 1, 2, 3, 4, 5]
            source1_branch += DataFilter(indices=np.arange(
                start=0, stop=int(np.floor(self.X_shape[1] / 2))))
            source1_branch += PipelineElement(
                "PCA",
                hyperparameters={"n_components": Categorical([None, 5])},
                test_disabled=True,
            )

            source2_branch = Branch("source2_features")
            # second half of features (for Boston Housing, same is indices=[6, 7, 8, 9, 10, 11, 12]
            source2_branch += DataFilter(indices=np.arange(
                start=int(np.floor(self.X_shape[1] /
                                   2)), stop=self.X_shape[1]))
            source2_branch += PipelineElement(
                "PCA",
                hyperparameters={"n_components": Categorical([None, 5])},
                test_disabled=True,
            )
            # setup source branches and stack their output (i.e. horizontal concatenation)
            pipe += Stack("source_stack",
                          elements=[source1_branch, source2_branch])
            # final estimator with stack output as features
            pipe += PipelineElement(
                "RandomForestClassifier",
                hyperparameters={
                    "min_samples_split":
                    FloatRange(start=0.05,
                               step=0.1,
                               stop=0.26,
                               range_type="range")
                },
            )

            self.run_hyperpipe(pipe, self.classification)
Пример #11
0
    def test_classification_2(self):
        for original_hyperpipe in self.hyperpipes:
            pipe = original_hyperpipe.copy_me()

            # Simple estimator Switch
            switch = Switch('estimator_switch')
            switch += PipelineElement('SVC', hyperparameters={'kernel': Categorical(['linear', 'rbf']),
                                                              'C': Categorical([.01, 1, 5])})
            switch += PipelineElement('RandomForestClassifier', hyperparameters={
                'min_samples_split': FloatRange(start=.05, step=.1, stop=.26, range_type='range')})
            pipe += switch

            self.run_hyperpipe(pipe, self.classification)
Пример #12
0
    def test_classification_6(self):
        for original_hyperpipe in self.hyperpipes:
            pipe = original_hyperpipe.copy_me()

            # Simple estimator Stack (use mean in the end)
            SVR = PipelineElement('SVC', hyperparameters={'kernel': Categorical(['linear', 'rbf']),
                                                          'C': Categorical([.01, 1, 5])})
            RF = PipelineElement('RandomForestClassifier', hyperparameters={
                'min_samples_split': FloatRange(start=.05, step=.1, stop=.26, range_type='range')})
            pipe += Stack('estimator_stack', elements=[SVR, RF])
            pipe += PipelineElement('PhotonVotingClassifier')

            self.run_hyperpipe(pipe, self.classification)
Пример #13
0
    def test_huge_combinations(self):
        hp = Hyperpipe('huge_combinations', inner_cv=KFold(n_splits=3), metrics=['accuracy'], best_config_metric='accuracy',
                       output_settings=OutputSettings(project_folder=self.tmp_folder_path))

        hp += PipelineElement("PCA", hyperparameters={'n_components': [5, 10]})
        stack = Stack('ensemble')
        for i in range(20):
            stack += PipelineElement('SVC', hyperparameters={'C': FloatRange(0.001, 5),
                                                             'kernel': ["linear", "rbf", "sigmoid", "polynomial"]})
        hp += stack
        hp += PipelineElement("SVC", hyperparameters={'kernel': ["linear", "rbf", "sigmoid"]})
        X, y = load_breast_cancer(return_X_y=True)
        with self.assertRaises(Warning):
            hp.fit(X, y)
Пример #14
0
    def test_cv_config_and_dummy_nr(self):
        X, y = load_boston(True)
        self.hyperpipe += PipelineElement("StandardScaler")
        self.hyperpipe += PipelineElement("PCA",
                                          {"n_components": IntegerRange(3, 7)})
        self.hyperpipe += PipelineElement(
            "SVR",
            {
                "C": FloatRange(0.001, 10, num=10),
                "kernel": Categorical(["linear", "rbf"]),
            },
        )

        self.hyperpipe.fit(X, y)

        expected_configs = 4 * 10 * 2

        # check version is present
        self.assertIsNotNone(self.hyperpipe.results.version)

        # check nr of outer and inner folds
        self.assertTrue(
            len(self.hyperpipe.results.outer_folds) == self.outer_fold_nr)
        self.assertTrue(
            len(self.hyperpipe.cross_validation.outer_folds) ==
            self.outer_fold_nr)

        for (
                outer_fold_id,
                inner_folds,
        ) in self.hyperpipe.cross_validation.inner_folds.items():
            self.assertTrue(len(inner_folds) == self.inner_fold_nr)

        for outer_fold_result in self.hyperpipe.results.outer_folds:
            # check that we have the right amount of configs tested in each outer fold
            self.assertTrue(
                len(outer_fold_result.tested_config_list) == expected_configs)

            for config_result in outer_fold_result.tested_config_list:
                # check that we have the right amount of inner-folds per config
                self.assertTrue(
                    len(config_result.inner_folds) == self.inner_fold_nr)

        self.check_for_dummy()
Пример #15
0
class BaseTest(unittest.TestCase):
    def setUp(self):
        """
        Set default start setting for all tests.
        """
        self.intger_range = IntegerRange(2, 6)
        self.float_range = FloatRange(0.1, 5.7)
        self.categorical = Categorical(
            ["a", "b", "c", "d", "e", "f", "g", "h"])
        self.bool = BooleanSwitch()

    def test_rand_success(self):

        for _ in range(100):
            self.assertIn(self.intger_range.get_random_value(),
                          list(range(2, 6)))

            self.assertGreaterEqual(self.float_range.get_random_value(), 0.1)
            self.assertLess(self.float_range.get_random_value(), 5.7)

            self.assertIn(
                self.categorical.get_random_value(),
                ["a", "b", "c", "d", "e", "f", "g", "h"],
            )

            self.assertIn(self.bool.get_random_value(), [True, False])

        self.float_range.transform()
        self.intger_range.transform()

        for _ in range(100):
            self.assertIn(
                self.intger_range.get_random_value(definite_list=True),
                self.intger_range.values,
            )
            self.assertIn(
                self.float_range.get_random_value(definite_list=True),
                self.float_range.values,
            )

    def test_rand_error(self):
        with self.assertRaises(ValueError):
            self.intger_range.get_random_value(definite_list=True)
            self.float_range.get_random_value(definite_list=True)
            self.bool.get_random_value(definite_list=True)
            self.categorical.get_random_value(definite_list=True)
Пример #16
0
    def test_classification_5(self):
        for original_hyperpipe in self.hyperpipes:
            pipe = original_hyperpipe.copy_me()

            # multi-switch
            # setup switch to choose between PCA or simple feature selection and add it to the pipe
            pre_switch = Switch("preproc_switch")
            pre_switch += PipelineElement(
                "PCA",
                hyperparameters={"n_components": Categorical([None, 5])},
                test_disabled=True,
            )
            pre_switch += PipelineElement(
                "FClassifSelectPercentile",
                hyperparameters={
                    "percentile":
                    IntegerRange(start=5, step=20, stop=66, range_type="range")
                },
                test_disabled=True,
            )
            pipe += pre_switch
            # setup estimator switch and add it to the pipe
            estimator_switch = Switch("estimator_switch")
            estimator_switch += PipelineElement(
                "SVC",
                hyperparameters={
                    "kernel": Categorical(["linear", "rbf"]),
                    "C": Categorical([0.01, 1, 5]),
                },
            )
            estimator_switch += PipelineElement(
                "RandomForestClassifier",
                hyperparameters={
                    "min_samples_split":
                    FloatRange(start=0.05,
                               step=0.1,
                               stop=0.26,
                               range_type="range")
                },
            )
            pipe += estimator_switch

            self.run_hyperpipe(pipe, self.classification)
Пример #17
0
    def test_classification_5(self):
        for original_hyperpipe in self.hyperpipes:
            pipe = original_hyperpipe.copy_me()

            # multi-switch
            # setup switch to choose between PCA or simple feature selection and add it to the pipe
            pre_switch = Switch('preproc_switch')
            pre_switch += PipelineElement('PCA', hyperparameters={'n_components': Categorical([None, 5])},
                                          test_disabled=True)
            pre_switch += PipelineElement('FClassifSelectPercentile', hyperparameters={
                'percentile': IntegerRange(start=5, step=20, stop=66, range_type='range')}, test_disabled=True)
            pipe += pre_switch
            # setup estimator switch and add it to the pipe
            estimator_switch = Switch('estimator_switch')
            estimator_switch += PipelineElement('SVC', hyperparameters={'kernel': Categorical(['linear', 'rbf']),
                                                                        'C': Categorical([.01, 1, 5])})
            estimator_switch += PipelineElement('RandomForestClassifier', hyperparameters={
                'min_samples_split': FloatRange(start=.05, step=.1, stop=.26, range_type='range')})
            pipe += estimator_switch

            self.run_hyperpipe(pipe, self.classification)
Пример #18
0
    def test_classification_8(self):
        for original_hyperpipe in self.hyperpipes:
            pipe = original_hyperpipe.copy_me()

            pipe += PipelineElement('StandardScaler')
            # setup pipeline branches with half of the features each
            # if both PCAs are disabled, features are simply concatenated and passed to the final estimator
            source1_branch = Branch('source1_features')
            # first half of features (for Boston Housing, same as indices=[0, 1, 2, 3, 4, 5]
            source1_branch += DataFilter(indices=np.arange(start=0, stop=int(np.floor(self.X_shape[1] / 2))))
            source1_branch += PipelineElement('ConfounderRemoval', {}, standardize_covariates=True, test_disabled=True,
                                              confounder_names=['cov1', 'cov2'])
            source1_branch += PipelineElement('PCA', hyperparameters={'n_components': Categorical([None, 5])},
                                              test_disabled=True)

            source2_branch = Branch('source2_features')
            # second half of features (for Boston Housing, same is indices=[6, 7, 8, 9, 10, 11, 12]
            source2_branch += DataFilter(indices=np.arange(start=int(np.floor(self.X_shape[1] / 2)), stop=self.X_shape[1]))
            source2_branch += PipelineElement('ConfounderRemoval', {}, standardize_covariates=True, test_disabled=True,
                                              confounder_names=['cov1', 'cov2'])
            source2_branch += PipelineElement('PCA', hyperparameters={'n_components': Categorical([None, 5])},
                                              test_disabled=True)

            # setup source branches and stack their output (i.e. horizontal concatenation)
            pipe += Stack('source_stack', elements=[source1_branch, source2_branch])

            # final estimator with stack output as features
            # setup estimator switch and add it to the pipe
            switch = Switch('estimator_switch')
            switch += PipelineElement('SVC', hyperparameters={'kernel': Categorical(['linear', 'rbf']),
                                                              'C': Categorical([.01, 1, 5])})
            switch += PipelineElement('RandomForestClassifier', hyperparameters={
                'min_samples_split': FloatRange(start=.05, step=.1, stop=.26, range_type='range')})
            pipe += switch

            self.run_hyperpipe(pipe, self.classification)
Пример #19
0
from photonai.optimization import FloatRange, IntegerRange

X, y = load_breast_cancer(return_X_y=True)

my_pipe = Hyperpipe('basic_stack_pipe',
                    optimizer='sk_opt',
                    optimizer_params={'n_configurations': 5},
                    metrics=['accuracy', 'precision', 'recall'],
                    best_config_metric='accuracy',
                    outer_cv=KFold(n_splits=3),
                    inner_cv=KFold(n_splits=3),
                    verbosity=1,
                    output_settings=OutputSettings(project_folder='./tmp/'))

my_pipe += PipelineElement('StandardScaler')

tree = PipelineElement('DecisionTreeClassifier',
                       hyperparameters={
                           'criterion': ['gini'],
                           'min_samples_split': IntegerRange(2, 4)
                       })

svc = PipelineElement('LinearSVC', hyperparameters={'C': FloatRange(0.5, 25)})

# for a stack that includes estimators you can choose whether predict or predict_proba is called for all estimators
# in case only some implement predict_proba, predict is called for the remaining estimators
my_pipe += Stack('final_stack', [tree, svc], use_probabilities=True)

my_pipe += PipelineElement('LinearSVC')
my_pipe.fit(X, y)
Пример #20
0
    'PCA', hyperparameters={'n_components': IntegerRange(5, 30)})
prepro_switch += PipelineElement('RandomTreesEmbedding',
                                 hyperparameters={
                                     'n_estimators': IntegerRange(10, 30),
                                     'max_depth': IntegerRange(3, 6)
                                 })
prepro_switch += PipelineElement(
    'SelectPercentile', hyperparameters={'percentile': IntegerRange(5, 15)})
#prepro_switch += PipelineElement('FastICA', hyperparameters={'algorithm': Categorical(['parallel', 'deflation'])})

estimator_switch = Switch("EstimatorSwitch")
estimator_switch += PipelineElement(
    'SVC',
    hyperparameters={
        'kernel': Categorical(["linear", "rbf", 'poly', 'sigmoid']),
        'C': FloatRange(0.5, 100),
        'decision_function_shape': Categorical(['ovo', 'ovr']),
        'degree': IntegerRange(2, 5)
    })
estimator_switch += PipelineElement("RandomForestClassifier",
                                    hyperparameters={
                                        'n_estimators': IntegerRange(10, 100),
                                        "min_samples_split":
                                        IntegerRange(2, 4)
                                    })
estimator_switch += PipelineElement(
    "ExtraTreesClassifier",
    hyperparameters={'n_estimators': IntegerRange(5, 50)})
estimator_switch += PipelineElement(
    "SGDClassifier",
    hyperparameters={'penalty': Categorical(['l2', 'l1', 'elasticnet'])})
Пример #21
0
from sklearn.datasets import load_iris
from sklearn.model_selection import KFold

from photonai.base import Hyperpipe, PipelineElement, OutputSettings
from photonai.optimization import FloatRange, Categorical

# loading the iris dataset
X, y = load_iris(return_X_y=True)

# DESIGN YOUR PIPELINE
my_pipe = Hyperpipe('multi_class_svm_pipe',
                    optimizer='random_grid_search',
                    optimizer_params={'n_configurations': 10},
                    metrics=['accuracy'],
                    best_config_metric='accuracy',
                    outer_cv=KFold(n_splits=3, shuffle=True),
                    inner_cv=KFold(n_splits=3, shuffle=True),
                    verbosity=1,
                    output_settings=OutputSettings(project_folder='./tmp/'))

my_pipe.add(PipelineElement('StandardScaler'))

my_pipe += PipelineElement('SVC',
                           hyperparameters={
                               'kernel': Categorical(['rbf', 'linear']),
                               'C': FloatRange(0.5, 2)
                           },
                           gamma='scale')

my_pipe.fit(X, y)
Пример #22
0
# DESIGN YOUR PIPELINE
my_pipe = Hyperpipe(
    "group_split_pipe",
    optimizer="grid_search",
    metrics=["accuracy", "precision", "recall"],
    best_config_metric="accuracy",
    outer_cv=GroupKFold(n_splits=4),
    inner_cv=GroupShuffleSplit(n_splits=10),
    verbosity=1,
    output_settings=OutputSettings(project_folder="./tmp/"),
)

# ADD ELEMENTS TO YOUR PIPELINE
# first normalize all features
my_pipe += PipelineElement("StandardScaler")
# then do feature selection using a PCA, specify which values to try in the hyperparameter search
my_pipe += PipelineElement("PCA",
                           hyperparameters={"n_components": [5, 10, None]},
                           test_disabled=True)
# engage and optimize the good old SVM for Classification
my_pipe += PipelineElement(
    "SVC",
    hyperparameters={
        "kernel": Categorical(["rbf", "linear"]),
        "C": FloatRange(0.5, 2, "linspace", num=5),
    },
)

# NOW TRAIN YOUR PIPELINE
my_pipe.fit(X, y, groups=groups)
Пример #23
0
 def test_false_range_type(self):
     with self.assertRaises(ValueError):
         float_range = FloatRange(1.0, 5.2, range_type='normal_distributed')
         float_range.transform()
Пример #24
0
    def test_against_smac(self):
        # PHOTON implementation
        self.pipe.add(PipelineElement("StandardScaler"))
        # then do feature selection using a PCA, specify which values to try in the hyperparameter search
        self.pipe += PipelineElement(
            "PCA", hyperparameters={"n_components": IntegerRange(5, 30)}
        )
        # engage and optimize the good old SVM for Classification
        self.pipe += PipelineElement(
            "SVC",
            hyperparameters={
                "kernel": Categorical(["linear", "rbf", "poly", "sigmoid"]),
                "C": FloatRange(0.5, 200),
            },
            gamma="auto",
        )

        self.X, self.y = self.simple_classification()
        self.pipe.fit(self.X, self.y)

        # AUTO ML direct
        # Build Configuration Space which defines all parameters and their ranges
        cs = ConfigurationSpace()

        # We define a few possible types of SVM-kernels and add them as "kernel" to our cs
        n_components = UniformIntegerHyperparameter(
            "PCA__n_components", 5, 30
        )  # , default_value=5)
        cs.add_hyperparameter(n_components)

        kernel = CategoricalHyperparameter(
            "SVC__kernel", ["linear", "rbf", "poly", "sigmoid"]
        )  # , default_value="linear")
        cs.add_hyperparameter(kernel)

        c = UniformFloatHyperparameter("SVC__C", 0.5, 200)  # , default_value=1)
        cs.add_hyperparameter(c)

        # Scenario object
        scenario = Scenario(
            {
                "run_obj": "quality",  # we optimize quality (alternatively runtime)
                "runcount-limit": 800,  # maximum function evaluations
                "cs": cs,  # configuration space
                "deterministic": "true",
                "shared_model": "false",  # !!!!
                "wallclock_limit": self.time_limit,
            }
        )

        # Optimize, using a SMAC-object
        print("Optimizing! Depending on your machine, this might take a few minutes.")
        smac = SMAC4BO(
            scenario=scenario,
            rng=np.random.RandomState(42),
            tae_runner=self.objective_function,
        )

        self.traurig = smac

        incumbent = smac.optimize()

        inc_value = self.objective_function(incumbent)

        print(incumbent)
        print(inc_value)

        runhistory_photon = self.smac_helper["data"].solver.runhistory
        runhistory_original = smac.solver.runhistory

        x_ax = range(
            1,
            min(
                len(runhistory_original.cost_per_config.keys()),
                len(runhistory_photon.cost_per_config.keys()),
            )
            + 1,
        )
        y_ax_original = [runhistory_original.cost_per_config[tmp] for tmp in x_ax]
        y_ax_photon = [runhistory_photon.cost_per_config[tmp] for tmp in x_ax]

        y_ax_original_inc = [min(y_ax_original[: tmp + 1]) for tmp in x_ax]
        y_ax_photon_inc = [min(y_ax_photon[: tmp + 1]) for tmp in x_ax]

        plt.figure(figsize=(10, 7))
        plt.plot(x_ax, y_ax_original, "g", label="Original")
        plt.plot(x_ax, y_ax_photon, "b", label="PHOTON")
        plt.plot(x_ax, y_ax_photon_inc, "r", label="PHOTON Incumbent")
        plt.plot(x_ax, y_ax_original_inc, "k", label="Original Incumbent")
        plt.title("Photon Prove")
        plt.xlabel("X")
        plt.ylabel("Y")
        plt.legend(loc="best")
        plt.show()

        def neighbours(items, fill=None):
            before = itertools.chain([fill], items)
            after = itertools.chain(
                items, [fill]
            )  # You could use itertools.zip_longest() later instead.
            next(after)
            for a, b, c in zip(before, items, after):
                yield [value for value in (a, b, c) if value is not fill]

        print("---------------")
        original_pairing = [
            sum(values) / len(values) for values in neighbours(y_ax_original)
        ]
        bias_term = np.mean(
            [
                abs(y_ax_original_inc[t] - y_ax_photon_inc[t])
                for t in range(len(y_ax_photon_inc))
            ]
        )
        photon_pairing = [
            sum(values) / len(values) - bias_term for values in neighbours(y_ax_photon)
        ]
        counter = 0
        for i, x in enumerate(x_ax):
            if abs(original_pairing[i] - photon_pairing[i]) > 0.05:
                counter += 1
            self.assertLessEqual(counter / len(x_ax), 0.15)
Пример #25
0
                    optimizer='smac',  # which optimizer PHOTON shall use, in this case smac
                    optimizer_params={'scenario_dict': scenario_dict},
                    metrics=['mean_squared_error', 'pearson_correlation'],
                    best_config_metric='mean_squared_error',
                    outer_cv=ShuffleSplit(n_splits=1, test_size=0.2),
                    inner_cv=KFold(n_splits=3),
                    verbosity=1,
                    output_settings=settings)


# ADD ELEMENTS TO YOUR PIPELINE
# first normalize all features
my_pipe.add(PipelineElement('StandardScaler'))
# then do feature selection using a PCA, specify which values to try in the hyperparameter search
my_pipe += PipelineElement('PCA', hyperparameters={'n_components': IntegerRange(5, 10)}, test_disabled=True)

switch = Switch("Test_Switch")
# engage and optimize SVR
# linspace and logspace is converted to uniform and log-uniform priors in skopt
switch += PipelineElement('SVR', hyperparameters={'C': FloatRange(0, 10, range_type='linspace'),
                                                   'epsilon': FloatRange(0, 0.0001, range_type='linspace'),
                                                   'tol': FloatRange(1e-4, 1e-2, range_type='linspace'),
                                                   'kernel': Categorical(['linear', 'rbf', 'poly'])})

switch += PipelineElement('RandomForestRegressor', hyperparameters={'n_estimators': Categorical([10, 20])})

my_pipe += switch

# NOW TRAIN YOUR PIPELINE
my_pipe.fit(X, y)
Пример #26
0
class HyperparameterBaseTest(unittest.TestCase):
    def setUp(self):
        """
        Set default start setting for all tests.
        """
        self.intger_range = IntegerRange(2, 6)
        self.float_range = FloatRange(0.1, 5.7)
        self.cateogrical_truth = ["a", "b", "c", "d", "e", "f", "g", "h"]
        self.categorical = Categorical(self.cateogrical_truth)
        self.bool = BooleanSwitch()

    def test_rand_success(self):

        for _ in range(100):
            self.assertIn(self.intger_range.get_random_value(),
                          list(range(2, 6)))

            self.assertGreaterEqual(self.float_range.get_random_value(), 0.1)
            self.assertLess(self.float_range.get_random_value(), 5.7)

            self.assertIn(self.categorical.get_random_value(),
                          self.cateogrical_truth)

            self.assertIn(self.bool.get_random_value(), [True, False])

        self.float_range.transform()
        self.intger_range.transform()

        for _ in range(100):
            self.assertIn(
                self.intger_range.get_random_value(definite_list=True),
                self.intger_range.values)
            self.assertIn(
                self.float_range.get_random_value(definite_list=True),
                self.float_range.values)

    def test_domain(self):

        self.float_range.transform()
        self.intger_range.transform()
        self.assertListEqual(self.intger_range.values, list(np.arange(2, 6)))
        self.assertListEqual(self.float_range.values,
                             list(np.linspace(0.1, 5.7, dtype=np.float64)))

        big_float_range = FloatRange(-300.57, np.pi * 4000)
        big_float_range.transform()
        self.assertListEqual(big_float_range.values,
                             list(np.linspace(-300.57, np.pi * 4000)))
        self.assertListEqual(self.categorical.values,
                             ["a", "b", "c", "d", "e", "f", "g", "h"])
        self.assertListEqual(self.bool.values, [True, False])

    def test_rand_error(self):
        with self.assertRaises(ValueError):
            self.intger_range.get_random_value(definite_list=True)
        with self.assertRaises(ValueError):
            self.float_range.get_random_value(definite_list=True)
        with self.assertRaises(NotImplementedError):
            self.categorical.get_random_value(definite_list=False)
        with self.assertRaises(NotImplementedError):
            self.categorical.get_random_value(definite_list=False)

    def test_categorical(self):
        self.assertEqual(self.categorical[2], self.cateogrical_truth[2])
Пример #27
0
        "acq_func_kwargs": {
            "kappa": 1.96
        },
    },
    metrics=["mean_squared_error", "pearson_correlation"],
    best_config_metric="mean_squared_error",
    outer_cv=ShuffleSplit(n_splits=1, test_size=0.2),
    inner_cv=KFold(n_splits=3),
    verbosity=1,
    output_settings=settings,
)

# ADD ELEMENTS TO YOUR PIPELINE
# first normalize all features
my_pipe += PipelineElement("StandardScaler")

# engage and optimize SVR
# linspace and logspace is converted to uniform and log-uniform priors in skopt
my_pipe += PipelineElement(
    "SVR",
    hyperparameters={
        "C": FloatRange(1e-3, 100, range_type="logspace"),
        "epsilon": FloatRange(1e-3, 10, range_type="logspace"),
        "tol": FloatRange(1e-4, 1e-2, range_type="linspace"),
        "kernel": Categorical(["linear", "rbf", "poly"]),
    },
)

# NOW TRAIN YOUR PIPELINE
my_pipe.fit(X, y)
Пример #28
0
X, y = load_breast_cancer(return_X_y=True)


my_pipe = Hyperpipe('example_project',
                    optimizer='sk_opt',
                    optimizer_params={'n_configurations': 25},
                    metrics=['accuracy', 'precision', 'recall'],
                    best_config_metric='accuracy',
                    outer_cv=KFold(n_splits=3),
                    inner_cv=KFold(n_splits=3))

my_pipe += PipelineElement('StandardScaler')
my_pipe += PipelineElement('PCA',
                           hyperparameters={'n_components':
                                            FloatRange(0.5, 0.8, step=0.1)},
                           test_disabled=True)

my_pipe += PipelineElement('ImbalancedDataTransformer',
                           hyperparameters={'method_name':
                                            ['RandomUnderSampler','SMOTE']},
                           test_disabled=True)

# set up two learning algorithms in an ensemble
ensemble_learner = Stack('estimators', use_probabilities=True)
ensemble_learner += PipelineElement('DecisionTreeClassifier',
                                    criterion='gini',
                                    hyperparameters={'min_samples_split':
                                                     IntegerRange(2, 4)})
ensemble_learner += PipelineElement('LinearSVC',
                                    hyperparameters={'C':
Пример #29
0
my_pipe = Hyperpipe(
    "basic_stack_pipe",
    optimizer="sk_opt",
    optimizer_params={"n_configurations": 5},
    metrics=["accuracy", "precision", "recall"],
    best_config_metric="accuracy",
    outer_cv=KFold(n_splits=3),
    inner_cv=KFold(n_splits=3),
    verbosity=1,
    output_settings=OutputSettings(project_folder="./tmp/"),
)

my_pipe += PipelineElement("StandardScaler")

tree = PipelineElement(
    "DecisionTreeClassifier",
    hyperparameters={
        "criterion": ["gini"],
        "min_samples_split": IntegerRange(2, 4)
    },
)

svc = PipelineElement("LinearSVC", hyperparameters={"C": FloatRange(0.5, 25)})

# for a stack that includes estimators you can choose whether predict or predict_proba is called for all estimators
# in case only some implement predict_proba, predict is called for the remaining estimators
my_pipe += Stack("final_stack", [tree, svc], use_probabilities=True)

my_pipe += PipelineElement("LinearSVC")
my_pipe.fit(X, y)
Пример #30
0
from photonai.base import Hyperpipe, PipelineElement
from photonai.optimization import FloatRange, Categorical, IntegerRange

# WE USE THE BREAST CANCER SET FROM SKLEARN
X, y = load_breast_cancer(return_X_y=True)

# DESIGN YOUR PIPELINE
my_pipe = Hyperpipe(
    'basic_svm_pipe',
    inner_cv=KFold(n_splits=5),
    outer_cv=KFold(n_splits=3),
    optimizer='sk_opt',
    optimizer_params={'n_configurations': 25},
    metrics=['accuracy', 'precision', 'recall', 'balanced_accuracy'],
    best_config_metric='accuracy')

my_pipe.add(PipelineElement('StandardScaler'))

my_pipe += PipelineElement(
    'PCA',
    hyperparameters={'n_components': IntegerRange(10, 30)},
    test_disabled=True)

my_pipe += PipelineElement('SVC',
                           hyperparameters={
                               'kernel': Categorical(['rbf', 'linear']),
                               'C': FloatRange(1, 6)
                           },
                           gamma='scale')

my_pipe.fit(X, y)