Пример #1
0
    def test_extract_feature_importances(self):
        # one machine with coef_
        self.pipe.fit(self.X, self.y)
        f_importances_coef = self.pipe.feature_importances_
        self.assertTrue(f_importances_coef is not None)
        self.assertTrue(isinstance(f_importances_coef, list))

        # one machine with feature_importances_
        f_imp_pipe = PhotonPipeline([
            ("StandardScaler", PipelineElement("StandardScaler")),
            ("PCA", PipelineElement("PCA")),
            ("DecisionTreeClassifier",
             PipelineElement("DecisionTreeClassifier")),
        ])
        f_imp_pipe.fit(self.X, self.y)
        f_importances = f_imp_pipe.feature_importances_
        self.assertTrue(f_importances is not None)
        self.assertTrue(isinstance(f_importances, list))

        # one machine that has no feature importances
        no_f_imp_pipe = PhotonPipeline([
            ("StandardScaler", PipelineElement("StandardScaler")),
            ("PCA", PipelineElement("PCA")),
            ("SVC", PipelineElement("SVC", kernel="rbf")),
        ])
        no_f_imp_pipe.fit(self.X, self.y)
        no_f_imps = no_f_imp_pipe.feature_importances_
        self.assertTrue(no_f_imps is None)
Пример #2
0
    def test_inverse_tansform(self):
        # simple pipe
        sk_pipe = SKPipeline([("SS", self.sk_ss), ("PCA", self.sk_pca)])
        sk_pipe.fit(self.X, self.y)
        sk_transform = sk_pipe.transform(self.X)
        sk_inverse_transformed = sk_pipe.inverse_transform(sk_transform)

        photon_pipe = PhotonPipeline([("SS", self.p_ss), ("PCA", self.p_pca)])
        photon_pipe.fit(self.X, self.y)
        p_transform, _, _ = photon_pipe.transform(self.X)
        p_inverse_transformed, _, _ = photon_pipe.inverse_transform(
            p_transform)

        self.assertTrue(
            np.array_equal(sk_inverse_transformed, p_inverse_transformed))

        # now including stack
        stack = Stack("stack", [self.p_pca])
        stack_pipeline = PhotonPipeline([
            ("stack", stack),
            ("StandardScaler", PipelineElement("StandardScaler")),
            ("LinearSVC", PipelineElement("LinearSVC")),
        ])
        stack_pipeline.fit(self.X, self.y)
        feature_importances = stack_pipeline.feature_importances_
        inversed_data, _, _ = stack_pipeline.inverse_transform(
            feature_importances)
        self.assertEqual(inversed_data.shape[1], self.X.shape[1])
Пример #3
0
    def setUp(self):
        super(CachedPhotonPipelineTests, self).setUp()
        # Photon Version
        ss = PipelineElement("StandardScaler", {})
        pca = PipelineElement("PCA", {"n_components": [3, 10, 50]},
                              random_state=3)
        svm = PipelineElement("SVC", {"kernel": ["rbf", "linear"]},
                              random_state=3)

        self.pipe = PhotonPipeline([("StandardScaler", ss), ("PCA", pca),
                                    ("SVC", svm)])

        self.pipe.caching = True
        self.pipe.fold_id = "12345643463434"
        self.pipe.cache_folder = self.cache_folder_path

        self.config1 = {
            "PCA__n_components": 4,
            "SVC__C": 3,
            "SVC__kernel": "rbf"
        }

        self.config2 = {
            "PCA__n_components": 7,
            "SVC__C": 1,
            "SVC__kernel": "linear"
        }

        self.X, self.y = load_breast_cancer(True)
Пример #4
0
        def objective_function_simple(self, cfg):
            cfg = {k: cfg[k] for k in cfg if cfg[k]}
            values = []

            train_indices = list(self.pipe.cross_validation.outer_folds.values(
            ))[0].train_indices
            self._validation_X, self._validation_y, _ = PhotonDataHelper.split_data(
                self.X, self.y, kwargs=None, indices=train_indices)

            for inner_fold in list(
                    list(self.pipe.cross_validation.inner_folds.values())
                [0].values()):
                sc = PipelineElement("StandardScaler", {})
                pca = PipelineElement("PCA", {}, random_state=42)
                svc = PipelineElement("SVC", {}, random_state=42, gamma='auto')
                my_pipe = PhotonPipeline([('StandardScaler', sc), ('PCA', pca),
                                          ('SVC', svc)])
                my_pipe.set_params(**cfg)
                my_pipe.fit(self._validation_X[inner_fold.train_indices, :],
                            self._validation_y[inner_fold.train_indices])
                values.append(
                    accuracy_score(
                        self._validation_y[inner_fold.test_indices],
                        my_pipe.predict(
                            self._validation_X[inner_fold.test_indices, :])))
            return 1 - np.mean(values)
Пример #5
0
 def setUp(self):
     super(InnerFoldTests, self).setUp()
     self.pipe = PhotonPipeline([
         ("StandardScaler", PipelineElement("StandardScaler")),
         ("PCA", PipelineElement("PCA")),
         ("RidgeClassifier", PipelineElement("RidgeClassifier")),
     ])
     self.config = {
         "PCA__n_components": 5,
         "RidgeClassifier__solver": "svd",
         "RidgeClassifier__random_state": 42,
     }
     self.outer_fold_id = "TestID"
     self.inner_cv = KFold(n_splits=4)
     self.X, self.y = load_breast_cancer(True)
     self.cross_validation = Hyperpipe.CrossValidation(
         self.inner_cv, None, True, 0.2, True, False)
     self.cross_validation.inner_folds = {
         self.outer_fold_id: {
             i: FoldInfo(i, i + 1, train, test)
             for i, (train,
                     test) in enumerate(self.inner_cv.split(self.X, self.y))
         }
     }
     self.optimization = Hyperpipe.Optimization(
         "grid_search", {}, ["accuracy", "recall", "specificity"],
         "accuracy", None)
Пример #6
0
    def setUp(self):
        super(CachedPhotonPipelineTests, self).setUp()
        # Photon Version
        ss = PipelineElement("StandardScaler", {})
        pca = PipelineElement("PCA", {'n_components': [3, 10, 50]}, random_state=3)
        svm = PipelineElement("SVC", {'kernel': ['rbf', 'linear']}, random_state=3)

        self.pipe = PhotonPipeline([('StandardScaler', ss),
                                    ('PCA', pca),
                                    ('SVC', svm)])

        self.pipe.caching = True
        self.pipe.fold_id = "12345643463434"
        CacheManager.clear_cache_files(self.cache_folder_path)
        self.pipe.cache_folder = self.cache_folder_path

        self.config1 = {'PCA__n_components': 4,
                        'SVC__C': 3,
                        'SVC__kernel': 'rbf'}

        self.config2 = {'PCA__n_components': 7,
                        'SVC__C': 1,
                        'SVC__kernel': 'linear'}

        self.X, self.y = load_breast_cancer(return_X_y=True)
Пример #7
0
 def test_random_state(self):
     photon_pipe = PhotonPipeline([("SS", self.p_ss), ("PCA", PipelineElement('PCA')), ("SVC", self.p_dt)])
     photon_pipe.random_state = 666
     photon_pipe.fit(self.X, self.y)
     self.assertEqual(self.p_dt.random_state, photon_pipe.random_state)
     self.assertEqual(photon_pipe.elements[1][-1].random_state, photon_pipe.random_state)
     self.assertEqual(self.p_dt.random_state, 666)
Пример #8
0
    def test_copy_me(self):
        switch = Switch("my_copy_switch")
        switch += PipelineElement("StandardScaler")
        switch += PipelineElement("RobustScaler", test_disabled=True)

        stack = Stack("RandomStack")
        stack += PipelineElement("SVC")
        branch = Branch('Random_Branch')
        pca_hyperparameters = {'n_components': [5, 10]}
        branch += PipelineElement("PCA", hyperparameters=pca_hyperparameters)
        branch += PipelineElement("DecisionTreeClassifier")
        stack += branch

        photon_pipe = PhotonPipeline([("SimpleImputer", PipelineElement("SimpleImputer")),
                                      ("my_copy_switch", switch),
                                      ('RandomStack', stack),
                                      ('Callback1', CallbackElement('tmp_callback', np.mean)),
                                      ("PhotonVotingClassifier", PipelineElement("PhotonVotingClassifier"))])

        copy_of_the_pipe = photon_pipe.copy_me()

        self.assertEqual(photon_pipe.random_state, copy_of_the_pipe.random_state)
        self.assertTrue(len(copy_of_the_pipe.elements) == 5)
        self.assertTrue(copy_of_the_pipe.elements[2][1].name == "RandomStack")
        self.assertTrue(copy_of_the_pipe.named_steps["my_copy_switch"].elements[1].test_disabled)
        self.assertDictEqual(copy_of_the_pipe.elements[2][1].elements[1].elements[0].hyperparameters,
                             {"PCA__n_components": [5, 10]})
        self.assertTrue(isinstance(copy_of_the_pipe.elements[3][1], CallbackElement))
        self.assertTrue(copy_of_the_pipe.named_steps["tmp_callback"].delegate_function == np.mean)
Пример #9
0
 def setUp(self):
     super(InnerFoldTests, self).setUp()
     self.pipe = PhotonPipeline([
         ('StandardScaler', PipelineElement('StandardScaler')),
         ('PCA', PipelineElement('PCA')),
         ('RidgeClassifier', PipelineElement('RidgeClassifier'))
     ])
     self.config = {
         'PCA__n_components': 5,
         'RidgeClassifier__solver': 'svd',
         'RidgeClassifier__random_state': 42
     }
     self.outer_fold_id = 'TestID'
     self.inner_cv = KFold(n_splits=4)
     self.X, self.y = load_breast_cancer(return_X_y=True)
     self.cross_validation = Hyperpipe.CrossValidation(
         self.inner_cv, None, True, 0.2, True, False, False, None)
     self.cross_validation.inner_folds = {
         self.outer_fold_id: {
             i: FoldInfo(i, i + 1, train, test)
             for i, (train,
                     test) in enumerate(self.inner_cv.split(self.X, self.y))
         }
     }
     self.optimization = Hyperpipe.Optimization(
         'grid_search', {}, ['accuracy', 'recall', 'specificity'],
         'accuracy', None)
Пример #10
0
    def test_y_and_covariates_transformation(self):

        X = np.ones((200, 50))
        y = np.ones((200, )) + 2
        kwargs = {"sample1": np.ones((200, 5))}

        photon_pipe = PhotonPipeline([("DummyTransformer",
                                       self.dummy_photon_element)])

        # if y is none all y transformer should be ignored
        Xt2, yt2, kwargst2 = photon_pipe.transform(X, None, **kwargs)
        self.assertTrue(np.array_equal(Xt2, X))
        self.assertTrue(np.array_equal(yt2, None))
        self.assertTrue(np.array_equal(kwargst2, kwargs))

        # if y is given, all y transformers should be working
        Xt, yt, kwargst = photon_pipe.transform(X, y, **kwargs)

        # assure that data is delivered to element correctly
        self.assertTrue(
            np.array_equal(X, self.dummy_photon_element.base_element.X))
        self.assertTrue(
            np.array_equal(y, self.dummy_photon_element.base_element.y))
        self.assertTrue(
            np.array_equal(
                kwargs["sample1"],
                self.dummy_photon_element.base_element.kwargs["sample1"],
            ))

        # assure that data is transformed correctly
        self.assertTrue(np.array_equal(Xt, X - 1))
        self.assertTrue(np.array_equal(yt, y + 1))
        self.assertTrue("sample1_edit" in kwargst)
        self.assertTrue(
            np.array_equal(kwargst["sample1_edit"], kwargs["sample1"] + 5))
Пример #11
0
    def test_add_preprocessing(self):
        my_preprocessing = Preprocessing()
        my_preprocessing += PipelineElement('LabelEncoder')
        photon_pipe = PhotonPipeline([("PCA", self.p_pca), ("SVC", self.p_svm)])
        photon_pipe._add_preprocessing(my_preprocessing)

        self.assertEqual(len(photon_pipe.named_steps), 3)
        first_element = photon_pipe.elements[0][1]
        self.assertTrue(first_element == my_preprocessing)
        self.assertTrue(photon_pipe.named_steps['Preprocessing'] == my_preprocessing)
Пример #12
0
    def test_predict_proba(self):

        sk_pipe = SKPipeline([("SS", self.sk_ss), ("SVC", self.sk_dt)])
        sk_pipe.fit(self.X, self.y)
        sk_proba = sk_pipe.predict_proba(self.X)

        photon_pipe = PhotonPipeline([("SS", self.p_ss), ("SVC", self.p_dt)])
        photon_pipe.fit(self.X, self.y)
        photon_proba = photon_pipe.predict_proba(self.X)

        self.assertTrue(np.array_equal(sk_proba, photon_proba))
Пример #13
0
    def objective_function(cfg):

        my_pipe = PhotonPipeline([('StandardScaler', StandardScaler()),
                                  ('SVC', SVC())])
        my_pipe.random_state = seed
        my_pipe.set_params(**cfg)
        my_pipe.fit(X, y)
        y_pred = my_pipe.predict(X_train)
        metric = accuracy_score(y_pred, y_true)

        return metric
Пример #14
0
    def test_no_estimator(self):

        no_estimator_pipe = PhotonPipeline([("StandardScaler", self.p_ss), ("PCA", self.p_pca)])
        no_estimator_pipe.fit(self.X, self.y)
        photon_no_estimator_transform, _, _ = no_estimator_pipe.transform(self.X)
        photon_no_estimator_predict = no_estimator_pipe.predict(self.X)

        self.assertTrue(np.array_equal(photon_no_estimator_predict, photon_no_estimator_transform))

        self.sk_ss.fit(self.X)
        standardized_data = self.sk_ss.transform(self.X)
        self.sk_pca.fit(standardized_data)
        pca_data = self.sk_pca.transform(standardized_data)

        self.assertTrue(np.array_equal(photon_no_estimator_transform, pca_data))
        self.assertTrue(np.array_equal(photon_no_estimator_predict, pca_data))
Пример #15
0
    def test_predict_with_training_flag(self):
        # manually edit labels
        sk_pipe = SKPipeline([("SS", self.sk_ss), ("SVC", self.sk_svc)])
        y_plus_one = self.y + 1
        sk_pipe.fit(self.X, y_plus_one)
        sk_pred = sk_pipe.predict(self.X)

        # edit labels during pipeline
        p_pipe = PhotonPipeline([("SS", self.p_ss), ("YT", self.dummy_photon_element), ("SVC", self.p_svm)])
        p_pipe.fit(self.X, self.y)
        p_pred = p_pipe.predict(self.X)

        sk_standardized_X = self.sk_ss.transform(self.X)
        input_of_y_transformer = self.dummy_photon_element.base_element.X
        self.assertTrue(np.array_equal(sk_standardized_X, input_of_y_transformer))

        self.assertTrue(np.array_equal(sk_pred, p_pred))
Пример #16
0
    def objective_function(self, cfg):
        cfg = {k: cfg[k] for k in cfg if cfg[k]}
        sc = PipelineElement("StandardScaler", {})
        pca = PipelineElement("PCA", {}, random_state=3)
        svc = PipelineElement("SVC", {}, random_state=3, gamma="auto")
        my_pipe = PhotonPipeline([("StandardScaler", sc), ("PCA", pca), ("SVC", svc)])
        my_pipe.set_params(**cfg)

        metric = cross_val_score(
            my_pipe,
            self.X,
            self.y,
            cv=3,
            scoring=make_scorer(accuracy_score, greater_is_better=True),
        )  # , scoring=my_pipe.predict)
        print("run")
        return 1 - np.mean(metric)
Пример #17
0
    def test_regular_use(self):

        photon_pipe = PhotonPipeline([("PCA", self.p_pca), ("SVC", self.p_svm)])
        photon_pipe.fit(self.X, self.y)

        photon_transformed_X, _, _ = photon_pipe.transform(self.X)
        photon_predicted_y = photon_pipe.predict(self.X)

        # the element is given by reference, so it should be fitted right here
        photon_ref_transformed_X, _, _ = self.p_pca.transform(self.X)
        photon_ref_predicted_y = self.p_svm.predict(photon_ref_transformed_X)

        self.assertTrue(np.array_equal(photon_transformed_X, photon_ref_transformed_X))
        self.assertTrue(np.array_equal(photon_predicted_y, photon_ref_predicted_y))

        sk_pipe = SKPipeline([('PCA', self.sk_pca), ("SVC", self.sk_svc)])
        sk_pipe.fit(self.X, self.y)

        sk_predicted_y = sk_pipe.predict(self.X)
        self.assertTrue(np.array_equal(photon_predicted_y, sk_predicted_y))
Пример #18
0
    def setUp(self):

        super(OuterFoldTests, self).setUp()
        self.fold_nr_inner_cv = 5
        self.inner_cv = ShuffleSplit(n_splits=self.fold_nr_inner_cv,
                                     random_state=42)
        self.outer_cv = ShuffleSplit(n_splits=1,
                                     test_size=0.2,
                                     random_state=42)
        self.cv_info = Hyperpipe.CrossValidation(
            inner_cv=self.inner_cv,
            outer_cv=self.outer_cv,
            eval_final_performance=True,
            test_size=0.2,
            calculate_metrics_per_fold=True,
            calculate_metrics_across_folds=False,
            learning_curves=False,
            learning_curves_cut=None)

        self.X, self.y = load_boston(return_X_y=True)
        self.outer_fold_id = "TestFoldOuter1"
        self.cv_info.outer_folds = {
            self.outer_fold_id: FoldInfo(0, 1, train, test)
            for train, test in self.outer_cv.split(self.X, self.y)
        }

        self.config_num = 2
        self.optimization_info = Hyperpipe.Optimization(
            metrics=['mean_absolute_error', 'mean_squared_error'],
            best_config_metric='mean_absolute_error',
            optimizer_input='grid_search',
            optimizer_params={},
            performance_constraints=None)
        self.elements = [
            PipelineElement('StandardScaler'),
            PipelineElement('PCA', {'n_components': [4, 7]}),
            PipelineElement('DecisionTreeRegressor', random_state=42)
        ]
        self.pipe = PhotonPipeline([(p.name, p) for p in self.elements])
Пример #19
0
        def objective_function_switch(self, cfg):
            cfg = {k: cfg[k] for k in cfg if cfg[k]}
            values = []

            train_indices = list(self.pipe.cross_validation.outer_folds.values(
            ))[0].train_indices
            self._validation_X, self._validation_y, _ = PhotonDataHelper.split_data(
                self.X, self.y, kwargs=None, indices=train_indices)

            switch = cfg["Estimator_switch"]
            del cfg["Estimator_switch"]
            for inner_fold in list(
                    list(self.pipe.cross_validation.inner_folds.values())
                [0].values()):
                sc = PipelineElement("StandardScaler", {})
                pca = PipelineElement("PCA", {}, random_state=42)
                if switch == 'svc':
                    est = PipelineElement("SVC", {},
                                          random_state=42,
                                          gamma='auto')
                    name = 'SVC'
                else:
                    est = PipelineElement("RandomForestClassifier", {},
                                          random_state=42)
                    name = "RandomForestClassifier"
                my_pipe = PhotonPipeline([('StandardScaler', sc), ('PCA', pca),
                                          (name, est)])
                my_pipe.set_params(**cfg)
                my_pipe.fit(self._validation_X[inner_fold.train_indices, :],
                            self._validation_y[inner_fold.train_indices])
                values.append(
                    accuracy_score(
                        self._validation_y[inner_fold.test_indices],
                        my_pipe.predict(
                            self._validation_X[inner_fold.test_indices, :])))
            return 1 - np.mean(values)
Пример #20
0
    def test_combi_from_single_and_group_caching(self):

        # 2. specify cache directories
        cache_folder_base = self.cache_folder_path
        cache_folder_neuro = os.path.join(cache_folder_base,
                                          'subject_caching_test')

        CacheManager.clear_cache_files(cache_folder_base)
        CacheManager.clear_cache_files(cache_folder_neuro)

        # 3. set up Neuro Branch
        nb = ParallelBranch("SubjectCaching", nr_of_processes=3)
        # increase complexity by adding batching
        nb += PipelineElement.create("ResampleImages",
                                     StupidAdditionTransformer(), {},
                                     batch_size=4)
        nb.base_element.cache_folder = cache_folder_neuro

        # 4. setup usual pipeline
        ss = PipelineElement("StandardScaler", {})
        pca = PipelineElement("PCA", {'n_components': [3, 10, 50]})
        svm = PipelineElement("SVR", {'kernel': ['rbf', 'linear']})

        pipe = PhotonPipeline([('NeuroBranch', nb), ('StandardScaler', ss),
                               ('PCA', pca), ('SVR', svm)])

        pipe.caching = True
        pipe.fold_id = "12345643463434"
        pipe.cache_folder = cache_folder_base

        def transform_and_check_folder(config, expected_nr_of_files_group,
                                       expected_nr_subject):
            pipe.set_params(**config)
            pipe.fit(self.X, self.y)
            nr_of_generated_cache_files = len(
                glob.glob(os.path.join(cache_folder_base, "*.p")))
            self.assertTrue(
                nr_of_generated_cache_files == expected_nr_of_files_group)

            nr_of_generated_cache_files_subject = len(
                glob.glob(os.path.join(cache_folder_neuro, "*.p")))
            self.assertTrue(
                nr_of_generated_cache_files_subject == expected_nr_subject)

        config1 = {
            'NeuroBranch__ResampleImages__voxel_size': 5,
            'PCA__n_components': 7,
            'SVR__C': 2
        }
        config2 = {
            'NeuroBranch__ResampleImages__voxel_size': 3,
            'PCA__n_components': 4,
            'SVR__C': 5
        }

        # first config we expect to have a cached_file for the standard scaler and the pca
        # and we expect to have two files (one resampler, one brain mask) for each input data
        transform_and_check_folder(config1, 2,
                                   self.nr_of_expected_pickles_per_config)

        # second config we expect to have two cached_file for the standard scaler (one time for 5 voxel input and one
        # time for 3 voxel input) and two files two for the first and second config pcas,
        # and we expect to have 2 * nr of input data for resampler plus one time masker
        transform_and_check_folder(config2, 4,
                                   2 * self.nr_of_expected_pickles_per_config)

        # when we transform with the first config again, nothing should happen
        transform_and_check_folder(config1, 4,
                                   2 * self.nr_of_expected_pickles_per_config)

        # when we transform with an empty config, a new entry for pca and standard scaler should be generated, as well
        # as a new cache item for each input data from the neuro branch for each itemin the neuro branch
        with self.assertRaises(ValueError):
            transform_and_check_folder({}, 6, 4 *
                                       self.nr_of_expected_pickles_per_config)

        CacheManager.clear_cache_files(cache_folder_base)
        CacheManager.clear_cache_files(cache_folder_neuro)
Пример #21
0
    def setUp(self):
        def callback(X, y=None, **kwargs):
            self.assertEqual(X.shape, (569, 30))
            print("Shape of transformed data: {}".format(X.shape))

        def predict_callback(X, y=None, **kwargs):
            self.assertEqual(X.shape, (569, ))
            print('Shape of predictions: {}'.format(X.shape))

        def callback_test_equality(X, y=None, **kwargs):
            self.assertTrue(np.array_equal(self.X, X))
            if y is not None:
                self.assertListEqual(self.y.tolist(), y.tolist())

        self.X, self.y = load_breast_cancer(True)

        self.clean_pipeline = PhotonPipeline(
            elements=[('PCA', PipelineElement('PCA')),
                      ('LogisticRegression',
                       PipelineElement('LogisticRegression'))])
        self.callback_pipeline = PhotonPipeline(elements=[(
            'First',
            CallbackElement('First', callback)), (
                'PCA', PipelineElement('PCA')
            ), ('Second', CallbackElement('Second', callback)
                ), ('LogisticRegression',
                    PipelineElement('LogisticRegression'))])
        self.clean_branch_pipeline = PhotonPipeline(
            elements=[('MyBranch',
                       Branch('MyBranch', [PipelineElement('PCA')])),
                      ('LogisticRegression',
                       PipelineElement('LogisticRegression'))])
        self.callback_branch_pipeline = PhotonPipeline(
            elements=[('First', CallbackElement('First', callback)),
                      ('MyBranch',
                       Branch('MyBranch', [
                           CallbackElement('Second', callback),
                           PipelineElement('PCA')
                       ])), ('Fourth', CallbackElement('Fourth', callback)),
                      ('LogisticRegression',
                       PipelineElement('LogisticRegression'))])
        self.callback_branch_pipeline_error = PhotonPipeline(
            elements=[('First', CallbackElement('First', callback)),
                      ('MyBranch',
                       Branch('MyBranch', [
                           CallbackElement('Second', callback),
                           PipelineElement('PCA'),
                           CallbackElement('Third', callback)
                       ])), ('Fourth', CallbackElement('Fourth', callback)),
                      ('LogisticRegression',
                       PipelineElement('LogisticRegression')
                       ), ('Fifth',
                           CallbackElement('Fifth', predict_callback))])
        # test that data is unaffected from pipeline
        self.callback_after_callback_pipeline = PhotonPipeline([
            ('Callback1', CallbackElement('Callback1', callback)),
            ('Callback2', CallbackElement('Callback2',
                                          callback_test_equality)),
            ('StandarcScaler', PipelineElement('StandardScaler'),
             ('SVR', PipelineElement('SVR')))
        ])
Пример #22
0
    def test_combi_from_single_and_group_caching(self):

        # 1. load data
        test_folder = os.path.join(os.path.dirname(os.path.abspath(__file__)),
                                   "../test_data/")
        X = AtlasLibrary().get_nii_files_from_folder(test_folder,
                                                     extension=".nii")
        nr_of_expected_pickles_per_config = len(X)
        y = np.random.randn(len(X))

        # 2. specify cache directories
        cache_folder_base = self.cache_folder_path
        cache_folder_neuro = os.path.join(cache_folder_base,
                                          "subject_caching_test")

        CacheManager.clear_cache_files(cache_folder_base)
        CacheManager.clear_cache_files(cache_folder_neuro)

        # 3. set up Neuro Branch
        nb = NeuroBranch("SubjectCaching", nr_of_processes=3)
        # increase complexity by adding batching
        nb += PipelineElement("ResampleImages", batch_size=4)
        nb += PipelineElement("BrainMask", batch_size=4)
        nb.base_element.cache_folder = cache_folder_neuro

        # 4. setup usual pipeline
        ss = PipelineElement("StandardScaler", {})
        pca = PipelineElement("PCA", {"n_components": [3, 10, 50]})
        svm = PipelineElement("SVR", {"kernel": ["rbf", "linear"]})

        pipe = PhotonPipeline([("NeuroBranch", nb), ("StandardScaler", ss),
                               ("PCA", pca), ("SVR", svm)])

        pipe.caching = True
        pipe.fold_id = "12345643463434"
        pipe.cache_folder = cache_folder_base

        def transform_and_check_folder(config, expected_nr_of_files_group,
                                       expected_nr_subject):
            pipe.set_params(**config)
            pipe.fit(X, y)
            nr_of_generated_cache_files = len(
                glob.glob(os.path.join(cache_folder_base, "*.p")))
            self.assertTrue(
                nr_of_generated_cache_files == expected_nr_of_files_group)

            nr_of_generated_cache_files_subject = len(
                glob.glob(os.path.join(cache_folder_neuro, "*.p")))
            self.assertTrue(
                nr_of_generated_cache_files_subject == expected_nr_subject)

        config1 = {
            "NeuroBranch__ResampleImages__voxel_size": 5,
            "PCA__n_components": 7,
            "SVR__C": 2,
        }
        config2 = {
            "NeuroBranch__ResampleImages__voxel_size": 3,
            "PCA__n_components": 4,
            "SVR__C": 5,
        }

        # first config we expect to have a cached_file for the standard scaler and the pca
        # and we expect to have two files (one resampler, one brain mask) for each input data
        transform_and_check_folder(config1, 2,
                                   2 * nr_of_expected_pickles_per_config)

        # second config we expect to have two cached_file for the standard scaler (one time for 5 voxel input and one
        # time for 3 voxel input) and two files two for the first and second config pcas,
        # and we expect to have 2 * nr of input data for resampler plus one time masker
        transform_and_check_folder(config2, 4,
                                   4 * nr_of_expected_pickles_per_config)

        # when we transform with the first config again, nothing should happen
        transform_and_check_folder(config1, 4,
                                   4 * nr_of_expected_pickles_per_config)

        # when we transform with an empty config, a new entry for pca and standard scaler should be generated, as well
        # as a new cache item for each input data from the neuro branch for each itemin the neuro branch
        with self.assertRaises(ValueError):
            transform_and_check_folder({}, 6,
                                       6 * nr_of_expected_pickles_per_config)

        CacheManager.clear_cache_files(cache_folder_base)
        CacheManager.clear_cache_files(cache_folder_neuro)