Exemplo n.º 1
0
    def setUp(self):
        self.X, self.y = load_breast_cancer(True)
        self.svc = PipelineElement('SVC', {
            'C': [0.1, 1],
            'kernel': ['rbf', 'sigmoid']
        })
        self.tree = PipelineElement('DecisionTreeClassifier',
                                    {'min_samples_split': [2, 3, 4]})
        self.gpc = PipelineElement('GaussianProcessClassifier')
        self.pca = PipelineElement('PCA')

        self.estimator_branch = Branch('estimator_branch',
                                       [self.tree.copy_me()])
        self.transformer_branch = Branch('transformer_branch',
                                         [self.pca.copy_me()])

        self.estimator_switch = Switch(
            'estimator_switch',
            [self.svc.copy_me(),
             self.tree.copy_me(),
             self.gpc.copy_me()])
        self.estimator_switch_with_branch = Switch(
            'estimator_switch_with_branch',
            [self.tree.copy_me(),
             self.estimator_branch.copy_me()])
        self.transformer_switch_with_branch = Switch(
            'transformer_switch_with_branch',
            [self.pca.copy_me(),
             self.transformer_branch.copy_me()])
        self.switch_in_switch = Switch('Switch_in_switch', [
            self.transformer_branch.copy_me(),
            self.transformer_switch_with_branch.copy_me()
        ])
Exemplo n.º 2
0
    def test_classification_9(self):
        for original_hyperpipe in self.hyperpipes:
            pipe = original_hyperpipe.copy_me()

            # crazy everything
            pipe += PipelineElement('StandardScaler')
            pipe += PipelineElement('SamplePairingClassification',
                                    {'draw_limit': [100], 'generator': Categorical(['nearest_pair', 'random_pair'])},
                                    distance_metric='euclidean', test_disabled=True)
            # setup pipeline branches with half of the features each
            # if both PCAs are disabled, features are simply concatenated and passed to the final estimator
            source1_branch = Branch('source1_features')
            # first half of features (for Boston Housing, same as indices=[0, 1, 2, 3, 4, 5]
            source1_branch += DataFilter(indices=np.arange(start=0, stop=int(np.floor(self.X_shape[1] / 2))))
            source1_branch += PipelineElement('PCA', hyperparameters={'n_components': Categorical([None, 5])},
                                              test_disabled=True)

            source2_branch = Branch('source2_features')
            # second half of features (for Boston Housing, same is indices=[6, 7, 8, 9, 10, 11, 12]
            source2_branch += DataFilter(indices=np.arange(start=int(np.floor(self.X_shape[1] / 2)), stop=self.X_shape[1]))
            source2_branch += PipelineElement('PCA', hyperparameters={'n_components': Categorical([None, 5])},
                                              test_disabled=True)
            # setup source branches and stack their output (i.e. horizontal concatenation)
            pipe += Stack('source_stack', elements=[source1_branch, source2_branch])
            # final estimator with stack output as features
            pipe += PipelineElement('RandomForestClassifier', hyperparameters={
                'min_samples_split': FloatRange(start=.05, step=.1, stop=.26, range_type='range')})

            self.run_hyperpipe(pipe, self.classification)
Exemplo n.º 3
0
    def test_branch_in_branch(self):
        """
        Test for deep Pipeline.
        """

        my_pipe = Hyperpipe(
            "basic_stacking",
            optimizer="grid_search",
            metrics=["accuracy", "precision", "recall"],
            best_config_metric="f1_score",
            outer_cv=KFold(n_splits=2),
            inner_cv=KFold(n_splits=3),
            verbosity=1,
            cache_folder="./cache/",
            output_settings=OutputSettings(project_folder="./tmp/"),
        )

        # BRANCH WITH QUANTILTRANSFORMER AND DECISIONTREECLASSIFIER
        tree_qua_branch = Branch("tree_branch")
        tree_qua_branch += PipelineElement("QuantileTransformer")
        tree_qua_branch += PipelineElement(
            "DecisionTreeClassifier",
            {"min_samples_split": IntegerRange(2, 4)},
            criterion="gini",
        )

        # BRANCH WITH MinMaxScaler AND DecisionTreeClassifier
        svm_mima_branch = Branch("svm_branch")
        svm_mima_branch += PipelineElement("MinMaxScaler")
        svm_mima_branch += PipelineElement(
            "SVC",
            {
                "kernel": ["rbf", "linear"],  # Categorical(['rbf', 'linear']),
                "C": IntegerRange(0.01, 2.0),
            },
            gamma="auto",
        )

        # BRANCH WITH StandardScaler AND KNeighborsClassifier
        knn_sta_branch = Branch("neighbour_branch")
        knn_sta_branch += PipelineElement("StandardScaler")
        knn_sta_branch += PipelineElement("KNeighborsClassifier")

        # voting = True to mean the result of every branch
        my_pipe += Stack("final_stack",
                         [tree_qua_branch, svm_mima_branch, knn_sta_branch])
        my_pipe += PipelineElement("LogisticRegression", solver="lbfgs")

        json_transformer = JsonTransformer()
        pipe_json = json_transformer.create_json(my_pipe)
        my_pipe_reload = json_transformer.from_json(pipe_json)
        pipe_json_reload = pipe_json = json_transformer.create_json(
            my_pipe_reload)
        self.assertEqual(pipe_json, pipe_json_reload)
Exemplo n.º 4
0
    def test_classification_9(self):
        for original_hyperpipe in self.hyperpipes:
            pipe = original_hyperpipe.copy_me()

            # crazy everything
            pipe += PipelineElement("StandardScaler")
            pipe += PipelineElement(
                "SamplePairingClassification",
                {
                    "draw_limit": [100],
                    "generator": Categorical(["nearest_pair", "random_pair"]),
                },
                distance_metric="euclidean",
                test_disabled=True,
            )
            # setup pipeline branches with half of the features each
            # if both PCAs are disabled, features are simply concatenated and passed to the final estimator
            source1_branch = Branch("source1_features")
            # first half of features (for Boston Housing, same as indices=[0, 1, 2, 3, 4, 5]
            source1_branch += DataFilter(indices=np.arange(
                start=0, stop=int(np.floor(self.X_shape[1] / 2))))
            source1_branch += PipelineElement(
                "PCA",
                hyperparameters={"n_components": Categorical([None, 5])},
                test_disabled=True,
            )

            source2_branch = Branch("source2_features")
            # second half of features (for Boston Housing, same is indices=[6, 7, 8, 9, 10, 11, 12]
            source2_branch += DataFilter(indices=np.arange(
                start=int(np.floor(self.X_shape[1] /
                                   2)), stop=self.X_shape[1]))
            source2_branch += PipelineElement(
                "PCA",
                hyperparameters={"n_components": Categorical([None, 5])},
                test_disabled=True,
            )
            # setup source branches and stack their output (i.e. horizontal concatenation)
            pipe += Stack("source_stack",
                          elements=[source1_branch, source2_branch])
            # final estimator with stack output as features
            pipe += PipelineElement(
                "RandomForestClassifier",
                hyperparameters={
                    "min_samples_split":
                    FloatRange(start=0.05,
                               step=0.1,
                               stop=0.26,
                               range_type="range")
                },
            )

            self.run_hyperpipe(pipe, self.classification)
Exemplo n.º 5
0
    def test_branch_in_branch(self):
        """
        Test for deep Pipeline.
        """

        my_pipe = Hyperpipe(
            'basic_stacking',
            optimizer='grid_search',
            metrics=['accuracy', 'precision', 'recall'],
            best_config_metric='f1_score',
            outer_cv=KFold(n_splits=2),
            inner_cv=KFold(n_splits=3),
            verbosity=1,
            cache_folder="./cache/",
            output_settings=OutputSettings(project_folder='./tmp/'))

        # BRANCH WITH QUANTILTRANSFORMER AND DECISIONTREECLASSIFIER
        tree_qua_branch = Branch('tree_branch')
        tree_qua_branch += PipelineElement('QuantileTransformer')
        tree_qua_branch += PipelineElement(
            'DecisionTreeClassifier',
            {'min_samples_split': IntegerRange(2, 4)},
            criterion='gini')

        # BRANCH WITH MinMaxScaler AND DecisionTreeClassifier
        svm_mima_branch = Branch('svm_branch')
        svm_mima_branch += PipelineElement('MinMaxScaler')
        svm_mima_branch += PipelineElement(
            'SVC',
            {
                'kernel': ['rbf', 'linear'],  # Categorical(['rbf', 'linear']),
                'C': IntegerRange(0.01, 2.0)
            },
            gamma='auto')

        # BRANCH WITH StandardScaler AND KNeighborsClassifier
        knn_sta_branch = Branch('neighbour_branch')
        knn_sta_branch += PipelineElement('StandardScaler')
        knn_sta_branch += PipelineElement('KNeighborsClassifier')

        # voting = True to mean the result of every branch
        my_pipe += Stack('final_stack',
                         [tree_qua_branch, svm_mima_branch, knn_sta_branch])
        my_pipe += PipelineElement('LogisticRegression', solver='lbfgs')

        json_transformer = JsonTransformer()
        pipe_json = json_transformer.create_json(my_pipe)
        my_pipe_reload = json_transformer.from_json(pipe_json)
        pipe_json_reload = pipe_json = json_transformer.create_json(
            my_pipe_reload)
        self.assertEqual(pipe_json, pipe_json_reload)
Exemplo n.º 6
0
    def test_copy_me(self):
        switch = Switch("my_copy_switch")
        switch += PipelineElement("StandardScaler")
        switch += PipelineElement("RobustScaler", test_disabled=True)

        stack = Stack("RandomStack")
        stack += PipelineElement("SVC")
        branch = Branch('Random_Branch')
        pca_hyperparameters = {'n_components': [5, 10]}
        branch += PipelineElement("PCA", hyperparameters=pca_hyperparameters)
        branch += PipelineElement("DecisionTreeClassifier")
        stack += branch

        photon_pipe = PhotonPipeline([("SimpleImputer", PipelineElement("SimpleImputer")),
                                      ("my_copy_switch", switch),
                                      ('RandomStack', stack),
                                      ('Callback1', CallbackElement('tmp_callback', np.mean)),
                                      ("PhotonVotingClassifier", PipelineElement("PhotonVotingClassifier"))])

        copy_of_the_pipe = photon_pipe.copy_me()

        self.assertEqual(photon_pipe.random_state, copy_of_the_pipe.random_state)
        self.assertTrue(len(copy_of_the_pipe.elements) == 5)
        self.assertTrue(copy_of_the_pipe.elements[2][1].name == "RandomStack")
        self.assertTrue(copy_of_the_pipe.named_steps["my_copy_switch"].elements[1].test_disabled)
        self.assertDictEqual(copy_of_the_pipe.elements[2][1].elements[1].elements[0].hyperparameters,
                             {"PCA__n_components": [5, 10]})
        self.assertTrue(isinstance(copy_of_the_pipe.elements[3][1], CallbackElement))
        self.assertTrue(copy_of_the_pipe.named_steps["tmp_callback"].delegate_function == np.mean)
Exemplo n.º 7
0
    def setup_crazy_pipe(self):
        # erase all, we need a complex and crazy task
        self.hyperpipe.elements = list()

        nmb_list = list()
        for i in range(5):
            nmb = ParallelBranch(name=str(i), nr_of_processes=i + 3)
            sp = PipelineElement(
                'PCA', hyperparameters={'n_components': IntegerRange(1, 50)})
            nmb += sp
            nmb_list.append(nmb)

        my_switch = Switch('disabling_test_switch')
        my_switch += nmb_list[0]
        my_switch += nmb_list[1]

        my_stack = Stack('stack_of_branches')
        for i in range(3):
            my_branch = Branch('branch_' + str(i + 2))
            my_branch += PipelineElement('StandardScaler')
            my_branch += nmb_list[i + 2]
            my_stack += my_branch

        self.hyperpipe.add(my_stack)
        self.hyperpipe.add(PipelineElement('StandardScaler'))
        self.hyperpipe.add(my_switch)
        self.hyperpipe.add(PipelineElement('SVC'))
        return nmb_list
Exemplo n.º 8
0
 def test_ask_advanced(self):
     """
     Test advanced functionality of .ask()
     """
     branch = Branch('branch')
     branch += PipelineElement('PCA')
     branch += PipelineElement('SVC', {
         'C': [0.1, 1],
         'kernel': ['rbf', 'sigmoid']
     })
     pipe_switch = Switch('switch', [
         PipelineElement("StandardScaler"),
         PipelineElement("MaxAbsScaler")
     ])
     self.pipeline_elements = [
         PipelineElement("StandardScaler"),
         PipelineElement(
             'PCA',
             hyperparameters={'n_components': IntegerRange(5, 20)},
             test_disabled=True), pipe_switch, branch,
         Switch('Switch_in_switch', [branch, pipe_switch])
     ]
     generated_elements = self.test_ask()
     self.assertIn("PCA__n_components", generated_elements)
     self.assertIn("Switch_in_switch__current_element", generated_elements)
     self.assertIn("branch__SVC__C", generated_elements)
     self.assertIn("branch__SVC__kernel", generated_elements)
     self.assertIn("switch__current_element", generated_elements)
Exemplo n.º 9
0
 def test_ask_advanced(self):
     """
     Test advanced functionality of .ask()
     """
     branch = Branch("branch")
     branch += PipelineElement("PCA")
     branch += PipelineElement("SVC", {
         "C": [0.1, 1],
         "kernel": ["rbf", "sigmoid"]
     })
     pipe_switch = Switch(
         "switch",
         [
             PipelineElement("StandardScaler"),
             PipelineElement("MaxAbsScaler")
         ],
     )
     self.pipeline_elements = [
         PipelineElement("StandardScaler"),
         PipelineElement(
             "PCA",
             hyperparameters={"n_components": IntegerRange(5, 20)},
             test_disabled=True,
         ),
         pipe_switch,
         branch,
         Switch("Switch_in_switch", [branch, pipe_switch]),
     ]
     generated_elements = self.test_ask()
     self.assertIn("PCA__n_components", generated_elements)
     self.assertIn("Switch_in_switch__current_element", generated_elements)
     self.assertIn("branch__SVC__C", generated_elements)
     self.assertIn("branch__SVC__kernel", generated_elements)
     self.assertIn("switch__current_element", generated_elements)
Exemplo n.º 10
0
    def setup_crazy_pipe(self):
        # erase all, we need a complex and crazy task
        self.hyperpipe.elements = list()

        nmb_list = list()
        for i in range(5):
            nmb = NeuroBranch(name=str(i), nr_of_processes=i + 3)
            nmb += PipelineElement("SmoothImages")
            nmb_list.append(nmb)

        my_switch = Switch("disabling_test_switch")
        my_switch += nmb_list[0]
        my_switch += nmb_list[1]

        my_stack = Stack("stack_of_branches")
        for i in range(3):
            my_branch = Branch("branch_" + str(i + 2))
            my_branch += PipelineElement("StandardScaler")
            my_branch += nmb_list[i + 2]
            my_stack += my_branch

        self.hyperpipe.add(my_stack)
        self.hyperpipe.add(PipelineElement("StandardScaler"))
        self.hyperpipe.add(my_switch)
        self.hyperpipe.add(PipelineElement("SVC"))
        return nmb_list
Exemplo n.º 11
0
    def test_estimator_type(self):
        def callback(X, y=None):
            pass

        transformer_branch = Branch(
            'TransBranch',
            [PipelineElement('PCA'),
             PipelineElement('FastICA')])
        classifier_branch = Branch('ClassBranch', [PipelineElement('SVC')])
        regressor_branch = Branch('RegBranch', [PipelineElement('SVR')])
        callback_branch = Branch(
            'CallBranch',
            [PipelineElement('SVR'),
             CallbackElement('callback', callback)])

        self.assertEqual(transformer_branch._estimator_type, None)
        self.assertEqual(classifier_branch._estimator_type, 'classifier')
        self.assertEqual(regressor_branch._estimator_type, 'regressor')
        self.assertEqual(callback_branch._estimator_type, None)
Exemplo n.º 12
0
    def setUp(self):
        self.svc_pipe_element = PipelineElement('SVC', {'C': [0.1, 1], 'kernel': ['rbf', 'sigmoid']})
        self.lr_pipe_element = PipelineElement('DecisionTreeClassifier', {'min_samples_split': [2, 3, 4]})
        self.pipe_switch = Switch('switch', [self.svc_pipe_element, self.lr_pipe_element])

        self.branch = Branch('branch')
        self.branch += PipelineElement('PCA')
        self.branch += self.svc_pipe_element

        self.switch_in_switch = Switch('Switch_in_switch', [self.branch,
                                                            self.pipe_switch])
Exemplo n.º 13
0
    def setUp(self):
        self.X, self.y = load_breast_cancer(True)
        self.scaler = PipelineElement("StandardScaler", {'with_mean': True})
        self.pca = PipelineElement('PCA', {'n_components': [1, 2]},
                                   test_disabled=True,
                                   random_state=3)
        self.tree = PipelineElement('DecisionTreeClassifier',
                                    {'min_samples_split': [2, 3, 4]},
                                    random_state=3)

        self.transformer_branch = Branch('MyBranch', [self.scaler, self.pca])
        self.transformer_branch_sklearn = SKPipeline([("SS", StandardScaler()),
                                                      ("PCA",
                                                       PCA(random_state=3))])
        self.estimator_branch = Branch('MyBranch',
                                       [self.scaler, self.pca, self.tree])
        self.estimator_branch_sklearn = SKPipeline([
            ("SS", StandardScaler()), ("PCA", PCA(random_state=3)),
            ("Tree", DecisionTreeClassifier(random_state=3))
        ])
Exemplo n.º 14
0
    def setUp(self):
        self.X, self.y = load_breast_cancer(True)

        self.pca = PipelineElement('PCA', {'n_components': [5, 10]})
        self.scaler = PipelineElement('StandardScaler', {'with_mean': [True]})
        self.svc = PipelineElement('SVC', {'C': [1, 2]})
        self.tree = PipelineElement('DecisionTreeClassifier',
                                    {'min_samples_leaf': [3, 5]})

        self.transformer_branch_1 = Branch('TransBranch1',
                                           [self.pca.copy_me()])
        self.transformer_branch_2 = Branch('TransBranch2',
                                           [self.scaler.copy_me()])

        self.estimator_branch_1 = Branch('EstBranch1', [self.svc.copy_me()])
        self.estimator_branch_2 = Branch('EstBranch2', [self.tree.copy_me()])

        self.transformer_stack = Stack(
            'TransformerStack',
            [self.pca.copy_me(), self.scaler.copy_me()])
        self.estimator_stack = Stack(
            'EstimatorStack',
            [self.svc.copy_me(), self.tree.copy_me()])
        self.transformer_branch_stack = Stack('TransBranchStack', [
            self.transformer_branch_1.copy_me(),
            self.transformer_branch_2.copy_me()
        ])
        self.estimator_branch_stack = Stack('EstBranchStack', [
            self.estimator_branch_1.copy_me(),
            self.estimator_branch_2.copy_me()
        ])

        self.stacks = [
            ([self.pca, self.scaler], self.transformer_stack),
            ([self.svc, self.tree], self.estimator_stack),
            ([self.transformer_branch_1,
              self.transformer_branch_2], self.transformer_branch_stack),
            ([self.estimator_branch_1,
              self.estimator_branch_2], self.estimator_branch_stack)
        ]
Exemplo n.º 15
0
    def test_sanity_check_pipe(self):
        test_branch = Branch('my_test_branch')

        def callback_func(X, y, **kwargs):
            pass

        with self.assertRaises(Warning):
            my_callback = CallbackElement('final_element_callback',
                                          delegate_function=callback_func)
            test_branch += my_callback
            no_callback_pipe = test_branch.prepare_photon_pipe(
                test_branch.elements)
            test_branch.sanity_check_pipeline(no_callback_pipe)
            self.assertFalse(no_callback_pipe[-1] is not my_callback)
Exemplo n.º 16
0
    def test_add(self):
        branch = Branch('MyBranch', [
            PipelineElement('PCA', {'n_components': [5]}),
            PipelineElement('FastICA')
        ])
        self.assertEqual(len(branch.elements), 2)
        self.assertDictEqual(branch._hyperparameters,
                             {'MyBranch__PCA__n_components': [5]})
        branch = Branch('MyBranch')
        branch += PipelineElement('PCA', {'n_components': [5]})
        branch += PipelineElement('FastICA')
        self.assertEqual(len(branch.elements), 2)
        self.assertDictEqual(branch._hyperparameters,
                             {'MyBranch__PCA__n_components': [5]})

        # add doubled item
        branch += PipelineElement('PCA', {'n_components': [10, 20]})
        self.assertEqual(branch.elements[-1].name, 'PCA2')
        self.assertDictEqual(
            branch.hyperparameters, {
                'MyBranch__PCA__n_components': [5],
                'MyBranch__PCA2__n_components': [10, 20]
            })
Exemplo n.º 17
0
    def test_prepare_photon_pipeline(self):
        test_branch = Branch('my_test_branch')
        test_branch += PipelineElement('SimpleImputer')
        test_branch += Switch('my_crazy_switch_bitch')
        test_branch += Stack('my_stacking_stack')
        test_branch += PipelineElement('SVC')

        generated_pipe = test_branch.prepare_photon_pipe(test_branch.elements)

        self.assertEqual(len(generated_pipe.named_steps), 4)
        for idx, element in enumerate(test_branch.elements):
            self.assertIs(generated_pipe.named_steps[element.name], element)
            self.assertIs(generated_pipe.elements[idx][1],
                          test_branch.elements[idx])
Exemplo n.º 18
0
    def test_classification_8(self):
        for original_hyperpipe in self.hyperpipes:
            pipe = original_hyperpipe.copy_me()

            pipe += PipelineElement('StandardScaler')
            # setup pipeline branches with half of the features each
            # if both PCAs are disabled, features are simply concatenated and passed to the final estimator
            source1_branch = Branch('source1_features')
            # first half of features (for Boston Housing, same as indices=[0, 1, 2, 3, 4, 5]
            source1_branch += DataFilter(indices=np.arange(start=0, stop=int(np.floor(self.X_shape[1] / 2))))
            source1_branch += PipelineElement('ConfounderRemoval', {}, standardize_covariates=True, test_disabled=True,
                                              confounder_names=['cov1', 'cov2'])
            source1_branch += PipelineElement('PCA', hyperparameters={'n_components': Categorical([None, 5])},
                                              test_disabled=True)

            source2_branch = Branch('source2_features')
            # second half of features (for Boston Housing, same is indices=[6, 7, 8, 9, 10, 11, 12]
            source2_branch += DataFilter(indices=np.arange(start=int(np.floor(self.X_shape[1] / 2)), stop=self.X_shape[1]))
            source2_branch += PipelineElement('ConfounderRemoval', {}, standardize_covariates=True, test_disabled=True,
                                              confounder_names=['cov1', 'cov2'])
            source2_branch += PipelineElement('PCA', hyperparameters={'n_components': Categorical([None, 5])},
                                              test_disabled=True)

            # setup source branches and stack their output (i.e. horizontal concatenation)
            pipe += Stack('source_stack', elements=[source1_branch, source2_branch])

            # final estimator with stack output as features
            # setup estimator switch and add it to the pipe
            switch = Switch('estimator_switch')
            switch += PipelineElement('SVC', hyperparameters={'kernel': Categorical(['linear', 'rbf']),
                                                              'C': Categorical([.01, 1, 5])})
            switch += PipelineElement('RandomForestClassifier', hyperparameters={
                'min_samples_split': FloatRange(start=.05, step=.1, stop=.26, range_type='range')})
            pipe += switch

            self.run_hyperpipe(pipe, self.classification)
Exemplo n.º 19
0
    def setUp(self):
        self.svc_pipe_element = PipelineElement("SVC", {
            "C": [0.1, 1],
            "kernel": ["rbf", "sigmoid"]
        })
        self.lr_pipe_element = PipelineElement(
            "DecisionTreeClassifier", {"min_samples_split": [2, 3, 4]})
        self.pipe_switch = Switch(
            "switch", [self.svc_pipe_element, self.lr_pipe_element])

        self.branch = Branch("branch")
        self.branch += PipelineElement("PCA")
        self.branch += self.svc_pipe_element

        self.switch_in_switch = Switch("Switch_in_switch",
                                       [self.branch, self.pipe_switch])
Exemplo n.º 20
0
    def test_copy_me(self):
        branch = Branch('MyBranch', [self.scaler, self.pca])

        copy = branch.copy_me()
        self.assertEqual(branch.random_state, copy.random_state)
        self.assertDictEqual(elements_to_dict(copy), elements_to_dict(branch))

        copy = branch.copy_me()
        copy.elements[1].base_element.n_components = 3
        self.assertNotEqual(copy.elements[1].base_element.n_components,
                            branch.elements[1].base_element.n_components)

        fake_copy = branch
        fake_copy.elements[1].base_element.n_components = 3
        self.assertEqual(fake_copy.elements[1].base_element.n_components,
                         branch.elements[1].base_element.n_components)
Exemplo n.º 21
0
    def test_add(self):
        stack = Stack('MyStack', [
            PipelineElement('PCA', {'n_components': [5]}),
            PipelineElement('FastICA')
        ])
        self.assertEqual(len(stack.elements), 2)
        self.assertDictEqual(stack._hyperparameters,
                             {'MyStack__PCA__n_components': [5]})
        stack = Stack('MyStack')
        stack += PipelineElement('PCA', {'n_components': [5]})
        stack += PipelineElement('FastICA')
        self.assertEqual(len(stack.elements), 2)
        self.assertDictEqual(stack._hyperparameters,
                             {'MyStack__PCA__n_components': [5]})

        def callback(X, y=None):
            pass

        stack = Stack('MyStack', [
            PipelineElement('PCA'),
            CallbackElement('MyCallback', callback),
            Switch('MySwitch',
                   [PipelineElement('PCA'),
                    PipelineElement('FastICA')]),
            Branch('MyBranch', [PipelineElement('PCA')])
        ])
        self.assertEqual(len(stack.elements), 4)

        # test doubled item
        with self.assertRaises(ValueError):
            stack += stack.elements[0]

        stack += PipelineElement('PCA', {'n_components': [10, 20]})
        self.assertEqual(stack.elements[-1].name, 'PCA2')
        self.assertDictEqual(
            stack.hyperparameters, {
                'MyStack__MySwitch__current_element': [(0, 0), (1, 0)],
                'MyStack__PCA2__n_components': [10, 20]
            })
Exemplo n.º 22
0
    def test_set_random_state(self):
        # we handle all elements in one method that is inherited so we capture them all in this test
        random_state = 53
        my_branch = Branch("random_state_branch")
        my_branch += PipelineElement("StandardScaler")
        my_switch = Switch("transformer_Switch")
        my_switch += PipelineElement("LassoFeatureSelection")
        my_switch += PipelineElement("PCA")
        my_branch += my_switch
        my_stack = Stack("Estimator_Stack")
        my_stack += PipelineElement("SVR")
        my_stack += PipelineElement("Ridge")
        my_branch += my_stack
        my_branch += PipelineElement("ElasticNet")

        my_branch.random_state = random_state
        self.assertTrue(my_switch.elements[1].random_state == random_state)
        self.assertTrue(
            my_switch.elements[1].base_element.random_state == random_state)
        self.assertTrue(my_stack.elements[1].random_state == random_state)
        self.assertTrue(
            my_stack.elements[1].base_element.random_state == random_state)
Exemplo n.º 23
0
neuro_branch += PipelineElement(
    "BrainAtlas",
    hyperparameters={},
    rois=["Hippocampus_L", "Hippocampus_R", "Amygdala_L", "Amygdala_R"],
    atlas_name="AAL",
    extract_mode="vec",
    batch_size=20,
)

# finally, add your neuro branch to your hyperpipe
neuro_branch += CallbackElement("NeuroCallback", my_monitor)
my_pipe += neuro_branch
# my_pipe += CallbackElement('NeuroCallback', my_monitor)

# now, add standard ML algorithms to your liking
feature_engineering = Branch("FeatureEngineering")
feature_engineering += PipelineElement("StandardScaler")


my_pipe += feature_engineering
my_pipe += CallbackElement("FECallback", my_monitor)
my_pipe += PipelineElement(
    "SVR", hyperparameters={"kernel": Categorical(["rbf", "linear"])}, gamma="scale"
)

# NOW TRAIN YOUR PIPELINE
start_time = time.time()
my_pipe.fit(X, y)
elapsed_time = time.time() - start_time
print(time.strftime("%H:%M:%S", time.gmtime(elapsed_time)))
Exemplo n.º 24
0
from photonai.base import Hyperpipe, PipelineElement, Stack, Branch, OutputSettings
from photonai.optimization import IntegerRange, Categorical

X, y = load_breast_cancer(True)

my_pipe = Hyperpipe('basic_stacking',
                    optimizer='grid_search',
                    metrics=['accuracy', 'precision', 'recall'],
                    best_config_metric='f1_score',
                    outer_cv=KFold(n_splits=3),
                    inner_cv=KFold(n_splits=10),
                    verbosity=1,
                    output_settings=OutputSettings(project_folder='./tmp/'))

# BRANCH WITH QUANTILTRANSFORMER AND DECISIONTREECLASSIFIER
tree_qua_branch = Branch('tree_branch')
tree_qua_branch += PipelineElement('QuantileTransformer')
tree_qua_branch += PipelineElement('DecisionTreeClassifier',
                                   {'min_samples_split': IntegerRange(2, 4)},
                                   criterion='gini')

# BRANCH WITH MinMaxScaler AND DecisionTreeClassifier
svm_mima_branch = Branch('svm_branch')
svm_mima_branch += PipelineElement('MinMaxScaler')
svm_mima_branch += PipelineElement('SVC', {
    'kernel': Categorical(['rbf', 'linear']),
    'C': IntegerRange(0.01, 2.0)
},
                                   gamma='auto')

# BRANCH WITH StandardScaler AND KNeighborsClassifier
Exemplo n.º 25
0
my_pipe = Hyperpipe('data_integration',
                    optimizer='random_grid_search',
                    optimizer_params={'n_configurations': 2},
                    metrics=['accuracy', 'precision', 'recall'],
                    best_config_metric='f1_score',
                    outer_cv=KFold(n_splits=3),
                    inner_cv=KFold(n_splits=3),
                    verbosity=1,
                    output_settings=OutputSettings(project_folder='./tmp/'))

my_pipe += PipelineElement('SimpleImputer')
my_pipe += PipelineElement('StandardScaler', {}, with_mean=True)

# Use only "mean" features: [mean_radius, mean_texture, mean_perimeter, mean_area, mean_smoothness, mean_compactness,
# mean_concavity, mean_concave_points, mean_symmetry, mean_fractal_dimension
mean_branch = Branch('MeanFeature')
mean_branch += DataFilter(indices=np.arange(10))
mean_branch += PipelineElement('SVC', {'C': FloatRange(0.1, 10)},
                               kernel='linear')

# Use only "error" features
error_branch = Branch('ErrorFeature')
error_branch += DataFilter(indices=np.arange(10, 20))
error_branch += PipelineElement('SVC', {'C': Categorical([100, 1000, 1000])},
                                kernel='linear')

# use only "worst" features: [worst_radius, worst_texture, ..., worst_fractal_dimension]
worst_branch = Branch('WorstFeature')
worst_branch += DataFilter(indices=np.arange(20, 30))
worst_branch += PipelineElement('SVC')
Exemplo n.º 26
0
    def test_classification_8(self):
        for original_hyperpipe in self.hyperpipes:
            pipe = original_hyperpipe.copy_me()

            pipe += PipelineElement("StandardScaler")
            # setup pipeline branches with half of the features each
            # if both PCAs are disabled, features are simply concatenated and passed to the final estimator
            source1_branch = Branch("source1_features")
            # first half of features (for Boston Housing, same as indices=[0, 1, 2, 3, 4, 5]
            source1_branch += DataFilter(indices=np.arange(
                start=0, stop=int(np.floor(self.X_shape[1] / 2))))
            source1_branch += PipelineElement(
                "ConfounderRemoval",
                {},
                standardize_covariates=True,
                test_disabled=True,
                confounder_names=["cov1", "cov2"],
            )
            source1_branch += PipelineElement(
                "PCA",
                hyperparameters={"n_components": Categorical([None, 5])},
                test_disabled=True,
            )

            source2_branch = Branch("source2_features")
            # second half of features (for Boston Housing, same is indices=[6, 7, 8, 9, 10, 11, 12]
            source2_branch += DataFilter(indices=np.arange(
                start=int(np.floor(self.X_shape[1] /
                                   2)), stop=self.X_shape[1]))
            source2_branch += PipelineElement(
                "ConfounderRemoval",
                {},
                standardize_covariates=True,
                test_disabled=True,
                confounder_names=["cov1", "cov2"],
            )
            source2_branch += PipelineElement(
                "PCA",
                hyperparameters={"n_components": Categorical([None, 5])},
                test_disabled=True,
            )

            # setup source branches and stack their output (i.e. horizontal concatenation)
            pipe += Stack("source_stack",
                          elements=[source1_branch, source2_branch])

            # final estimator with stack output as features
            # setup estimator switch and add it to the pipe
            switch = Switch("estimator_switch")
            switch += PipelineElement(
                "SVC",
                hyperparameters={
                    "kernel": Categorical(["linear", "rbf"]),
                    "C": Categorical([0.01, 1, 5]),
                },
            )
            switch += PipelineElement(
                "RandomForestClassifier",
                hyperparameters={
                    "min_samples_split":
                    FloatRange(start=0.05,
                               step=0.1,
                               stop=0.26,
                               range_type="range")
                },
            )
            pipe += switch

            self.run_hyperpipe(pipe, self.classification)
Exemplo n.º 27
0
    def test_save_optimum_pipe(self):
        # todo: test .save() of custom model
        tmp_path = os.path.join(self.tmp_folder_path, "optimum_pipypipe")
        settings = OutputSettings(project_folder=tmp_path, overwrite_results=True)

        my_pipe = Hyperpipe(
            "hyperpipe",
            optimizer="random_grid_search",
            optimizer_params={"n_configurations": 3},
            metrics=["accuracy", "precision", "recall"],
            best_config_metric="f1_score",
            outer_cv=KFold(n_splits=2),
            inner_cv=KFold(n_splits=2),
            verbosity=1,
            output_settings=settings,
        )

        preproc = Preprocessing()
        preproc += PipelineElement("StandardScaler")

        # BRANCH WITH QUANTILTRANSFORMER AND DECISIONTREECLASSIFIER
        tree_qua_branch = Branch("tree_branch")
        tree_qua_branch += PipelineElement("QuantileTransformer")
        tree_qua_branch += PipelineElement(
            "DecisionTreeClassifier",
            {"min_samples_split": IntegerRange(2, 4)},
            criterion="gini",
        )

        # BRANCH WITH MinMaxScaler AND DecisionTreeClassifier
        svm_mima_branch = Branch("svm_branch")
        svm_mima_branch += PipelineElement("MinMaxScaler")
        svm_mima_branch += PipelineElement(
            "SVC", {"kernel": Categorical(["rbf", "linear"]), "C": 2.0}, gamma="auto"
        )

        # BRANCH WITH StandardScaler AND KNeighborsClassifier
        knn_sta_branch = Branch("neighbour_branch")
        knn_sta_branch += PipelineElement.create("dummy", DummyTransformer(), {})
        knn_sta_branch += PipelineElement("KNeighborsClassifier")

        my_pipe += preproc
        # voting = True to mean the result of every branch
        my_pipe += Stack(
            "final_stack", [tree_qua_branch, svm_mima_branch, knn_sta_branch]
        )

        my_pipe += PipelineElement("LogisticRegression", solver="lbfgs")

        my_pipe.fit(self.__X, self.__y)
        model_path = os.path.join(
            my_pipe.output_settings.results_folder, "photon_best_model.photon"
        )
        self.assertTrue(os.path.exists(model_path))

        # now move optimum pipe to new folder
        test_folder = os.path.join(
            my_pipe.output_settings.results_folder, "new_test_folder"
        )
        new_model_path = os.path.join(test_folder, "photon_best_model.photon")
        os.makedirs(test_folder)
        shutil.copyfile(model_path, new_model_path)

        # check if load_optimum_pipe also works
        # check if we have the meta information recovered
        loaded_optimum_pipe = Hyperpipe.load_optimum_pipe(new_model_path)
        self.assertIsNotNone(loaded_optimum_pipe._meta_information)
        self.assertIsNotNone(loaded_optimum_pipe._meta_information["photon_version"])

        # check if predictions stay realiably the same
        y_pred_loaded = loaded_optimum_pipe.predict(self.__X)
        y_pred = my_pipe.optimum_pipe.predict(self.__X)
        np.testing.assert_array_equal(y_pred_loaded, y_pred)
Exemplo n.º 28
0
X, y = load_breast_cancer(True)

my_pipe = Hyperpipe(
    "basic_stacking",
    optimizer="grid_search",
    metrics=["accuracy", "precision", "recall"],
    best_config_metric="f1_score",
    outer_cv=KFold(n_splits=3),
    inner_cv=KFold(n_splits=10),
    verbosity=1,
    output_settings=OutputSettings(project_folder="./tmp/"),
)

# BRANCH WITH QUANTILTRANSFORMER AND DECISIONTREECLASSIFIER
tree_qua_branch = Branch("tree_branch")
tree_qua_branch += PipelineElement("QuantileTransformer")
tree_qua_branch += PipelineElement(
    "DecisionTreeClassifier",
    {"min_samples_split": IntegerRange(2, 4)},
    criterion="gini",
)

# BRANCH WITH MinMaxScaler AND DecisionTreeClassifier
svm_mima_branch = Branch("svm_branch")
svm_mima_branch += PipelineElement("MinMaxScaler")
svm_mima_branch += PipelineElement(
    "SVC",
    {
        "kernel": Categorical(["rbf", "linear"]),
        "C": IntegerRange(0.01, 2.0)
Exemplo n.º 29
0
# additionally, you can smooth the entire image
neuro_branch += PipelineElement('SmoothImages', {'fwhm': Categorical([6, 8])}, batch_size=20)

# now, apply a brain atlas and extract 4 ROIs
# set "extract_mode" to "vec" so that all voxels within these ROIs are vectorized and concatenated
neuro_branch += PipelineElement('BrainAtlas', hyperparameters={},
                                rois=['Hippocampus_L', 'Hippocampus_R', 'Amygdala_L', 'Amygdala_R'],
                                atlas_name="AAL", extract_mode='vec', batch_size=20)

# finally, add your neuro branch to your hyperpipe
neuro_branch += CallbackElement('NeuroCallback', my_monitor)
my_pipe += neuro_branch
# my_pipe += CallbackElement('NeuroCallback', my_monitor)

# now, add standard ML algorithms to your liking
feature_engineering = Branch('FeatureEngineering')
feature_engineering += PipelineElement('StandardScaler')


my_pipe += feature_engineering
my_pipe += CallbackElement('FECallback', my_monitor)
my_pipe += PipelineElement('SVR', hyperparameters={'kernel': Categorical(['rbf', 'linear'])}, gamma='scale')

# NOW TRAIN YOUR PIPELINE
start_time = time.time()
my_pipe.fit(X, y)
elapsed_time = time.time() - start_time
print(time.strftime("%H:%M:%S", time.gmtime(elapsed_time)))

debug = True
Exemplo n.º 30
0
    def setUp(self):
        def callback(X, y=None, **kwargs):
            self.assertEqual(X.shape, (569, 30))
            print("Shape of transformed data: {}".format(X.shape))

        def predict_callback(X, y=None, **kwargs):
            self.assertEqual(X.shape, (569, ))
            print('Shape of predictions: {}'.format(X.shape))

        def callback_test_equality(X, y=None, **kwargs):
            self.assertTrue(np.array_equal(self.X, X))
            if y is not None:
                self.assertListEqual(self.y.tolist(), y.tolist())

        self.X, self.y = load_breast_cancer(True)

        self.clean_pipeline = PhotonPipeline(
            elements=[('PCA', PipelineElement('PCA')),
                      ('LogisticRegression',
                       PipelineElement('LogisticRegression'))])
        self.callback_pipeline = PhotonPipeline(elements=[(
            'First',
            CallbackElement('First', callback)), (
                'PCA', PipelineElement('PCA')
            ), ('Second', CallbackElement('Second', callback)
                ), ('LogisticRegression',
                    PipelineElement('LogisticRegression'))])
        self.clean_branch_pipeline = PhotonPipeline(
            elements=[('MyBranch',
                       Branch('MyBranch', [PipelineElement('PCA')])),
                      ('LogisticRegression',
                       PipelineElement('LogisticRegression'))])
        self.callback_branch_pipeline = PhotonPipeline(
            elements=[('First', CallbackElement('First', callback)),
                      ('MyBranch',
                       Branch('MyBranch', [
                           CallbackElement('Second', callback),
                           PipelineElement('PCA')
                       ])), ('Fourth', CallbackElement('Fourth', callback)),
                      ('LogisticRegression',
                       PipelineElement('LogisticRegression'))])
        self.callback_branch_pipeline_error = PhotonPipeline(
            elements=[('First', CallbackElement('First', callback)),
                      ('MyBranch',
                       Branch('MyBranch', [
                           CallbackElement('Second', callback),
                           PipelineElement('PCA'),
                           CallbackElement('Third', callback)
                       ])), ('Fourth', CallbackElement('Fourth', callback)),
                      ('LogisticRegression',
                       PipelineElement('LogisticRegression')
                       ), ('Fifth',
                           CallbackElement('Fifth', predict_callback))])
        # test that data is unaffected from pipeline
        self.callback_after_callback_pipeline = PhotonPipeline([
            ('Callback1', CallbackElement('Callback1', callback)),
            ('Callback2', CallbackElement('Callback2',
                                          callback_test_equality)),
            ('StandarcScaler', PipelineElement('StandardScaler'),
             ('SVR', PipelineElement('SVR')))
        ])