def generate_hyperpipes(self): if self.atlas_info_object.roi_names_runtime: self.rois = self.atlas_info_object.roi_names_runtime # # self.outer_pipe = Hyperpipe(self.atlas_name + 'outer_pipe', optimizer='grid_search', # metrics=['accuracy'], hyperparameter_specific_config_cv_object= # ShuffleSplit(n_splits=1, test_size=0.2, random_state=3), # hyperparameter_search_cv_object= # ShuffleSplit(n_splits=1, test_size=0.2, random_state=3), # eval_final_performance=True) inner_pipe_list = {} for i in range(len(self.rois)): tmp_inner_pipe = Hyperpipe(self.atlas_name + '_' + str(self.rois[i]), optimizer='grid_search', inner_cv=ShuffleSplit(n_splits=1, test_size=0.2, random_state=3), eval_final_performance=False, verbose=logging.verbosity_level, best_config_metric=self.best_config_metric, metrics=self.metrics) # at first set a filter element roi_filter_element = RoiFilterElement(i) tmp_inner_pipe.filter_element = roi_filter_element # secondly add all other items for pipe_item in self.hyperpipe_elements: tmp_inner_pipe += PipelineElement.create(pipe_item[0], pipe_item[1], **pipe_item[2]) inner_pipe_list[self.rois[i]] = tmp_inner_pipe self.pipeline_fusion = Stack('multiple_source_pipes', inner_pipe_list.values(), voting=False)
class AtlasStacker(BaseEstimator): def __init__(self, atlas_info_object, hyperpipe_elements, best_config_metric=[], metrics=[]): # ToDo # - Stacker self.atlas_info_object = atlas_info_object self.atlas_name = self.atlas_info_object.atlas_name self.hyperpipe_elements = hyperpipe_elements self.pipeline_fusion = None self.best_config_metric = best_config_metric self.metrics = metrics # self.outer_pipe += pipeline_fusion def generate_hyperpipes(self): if self.atlas_info_object.roi_names_runtime: self.rois = self.atlas_info_object.roi_names_runtime # # self.outer_pipe = Hyperpipe(self.atlas_name + 'outer_pipe', optimizer='grid_search', # metrics=['accuracy'], hyperparameter_specific_config_cv_object= # ShuffleSplit(n_splits=1, test_size=0.2, random_state=3), # hyperparameter_search_cv_object= # ShuffleSplit(n_splits=1, test_size=0.2, random_state=3), # eval_final_performance=True) inner_pipe_list = {} for i in range(len(self.rois)): tmp_inner_pipe = Hyperpipe(self.atlas_name + '_' + str(self.rois[i]), optimizer='grid_search', inner_cv=ShuffleSplit(n_splits=1, test_size=0.2, random_state=3), eval_final_performance=False, verbose=logging.verbosity_level, best_config_metric=self.best_config_metric, metrics=self.metrics) # at first set a filter element roi_filter_element = RoiFilterElement(i) tmp_inner_pipe.filter_element = roi_filter_element # secondly add all other items for pipe_item in self.hyperpipe_elements: tmp_inner_pipe += PipelineElement.create(pipe_item[0], pipe_item[1], **pipe_item[2]) inner_pipe_list[self.rois[i]] = tmp_inner_pipe self.pipeline_fusion = Stack('multiple_source_pipes', inner_pipe_list.values(), voting=False) # Todo: else raise Error def fit(self, X, y=None): if not self.pipeline_fusion and not self.atlas_info_object.roi_names_runtime: raise BaseException('No ROIs could be received from Brain Atlas') elif not self.pipeline_fusion and self.atlas_info_object.roi_names_runtime: self.generate_hyperpipes() self.pipeline_fusion.fit(X, y) return self def transform(self, X, y=None): return self.pipeline_fusion.transform(X, y)
def test_classification_6(self): for original_hyperpipe in self.hyperpipes: pipe = original_hyperpipe.copy_me() # Simple estimator Stack (use mean in the end) SVR = PipelineElement( "SVC", hyperparameters={ "kernel": Categorical(["linear", "rbf"]), "C": Categorical([0.01, 1, 5]), }, ) RF = PipelineElement( "RandomForestClassifier", hyperparameters={ "min_samples_split": FloatRange(start=0.05, step=0.1, stop=0.26, range_type="range") }, ) pipe += Stack("estimator_stack", elements=[SVR, RF]) pipe += PipelineElement("PhotonVotingClassifier") self.run_hyperpipe(pipe, self.classification)
def test_no_y_transformers(self): stacking_element = Stack("forbidden_stack") my_dummy = PipelineElement.create( "dummy", DummyNeedsCovariatesAndYTransformer(), {}) with self.assertRaises(NotImplementedError): stacking_element += my_dummy
def test_classification_12(self): X, y = load_iris(True) # multiclass classification for original_hyperpipe in self.hyperpipes: pipe = original_hyperpipe.copy_me() # Simple estimator Stack (train Random Forest on estimator stack proba outputs) # create estimator stack SVC1 = PipelineElement( "SVC", hyperparameters={ "kernel": Categorical(["linear"]), "C": Categorical([0.01, 1, 5]), }, ) SVC2 = PipelineElement( "SVC", hyperparameters={ "kernel": Categorical(["rbf"]), "C": Categorical([0.01, 1, 5]), }, ) RF = PipelineElement("RandomForestClassifier") # add to pipe pipe += Stack("estimator_stack", elements=[SVC1, SVC2, RF], use_probabilities=True) pipe += PipelineElement("RandomForestClassifier") pipe.optimization.metrics = ["accuracy"] pipe.optimization.best_config_metric = "accuracy" pipe.fit(X, y)
def test_huge_combinations(self): hp = Hyperpipe( "huge_combinations", metrics=["accuracy"], best_config_metric="accuracy", output_settings=OutputSettings( project_folder=self.tmp_folder_path), ) hp += PipelineElement("PCA", hyperparameters={"n_components": [5, 10]}) stack = Stack("ensemble") for i in range(20): stack += PipelineElement( "SVC", hyperparameters={ "C": FloatRange(0.001, 5), "kernel": ["linear", "rbf", "sigmoid", "polynomial"], }, ) hp += stack hp += PipelineElement( "SVC", hyperparameters={"kernel": ["linear", "rbf", "sigmoid"]}) X, y = load_breast_cancer(True) with self.assertRaises(Warning): hp.fit(X, y)
def setup_crazy_pipe(self): # erase all, we need a complex and crazy task self.hyperpipe.elements = list() nmb_list = list() for i in range(5): nmb = ParallelBranch(name=str(i), nr_of_processes=i + 3) sp = PipelineElement( 'PCA', hyperparameters={'n_components': IntegerRange(1, 50)}) nmb += sp nmb_list.append(nmb) my_switch = Switch('disabling_test_switch') my_switch += nmb_list[0] my_switch += nmb_list[1] my_stack = Stack('stack_of_branches') for i in range(3): my_branch = Branch('branch_' + str(i + 2)) my_branch += PipelineElement('StandardScaler') my_branch += nmb_list[i + 2] my_stack += my_branch self.hyperpipe.add(my_stack) self.hyperpipe.add(PipelineElement('StandardScaler')) self.hyperpipe.add(my_switch) self.hyperpipe.add(PipelineElement('SVC')) return nmb_list
def test_inverse_tansform(self): # simple pipe sk_pipe = SKPipeline([("SS", self.sk_ss), ("PCA", self.sk_pca)]) sk_pipe.fit(self.X, self.y) sk_transform = sk_pipe.transform(self.X) sk_inverse_transformed = sk_pipe.inverse_transform(sk_transform) photon_pipe = PhotonPipeline([("SS", self.p_ss), ("PCA", self.p_pca)]) photon_pipe.fit(self.X, self.y) p_transform, _, _ = photon_pipe.transform(self.X) p_inverse_transformed, _, _ = photon_pipe.inverse_transform( p_transform) self.assertTrue( np.array_equal(sk_inverse_transformed, p_inverse_transformed)) # now including stack stack = Stack("stack", [self.p_pca]) stack_pipeline = PhotonPipeline([ ("stack", stack), ("StandardScaler", PipelineElement("StandardScaler")), ("LinearSVC", PipelineElement("LinearSVC")), ]) stack_pipeline.fit(self.X, self.y) feature_importances = stack_pipeline.feature_importances_ inversed_data, _, _ = stack_pipeline.inverse_transform( feature_importances) self.assertEqual(inversed_data.shape[1], self.X.shape[1])
def test_classification_9(self): for original_hyperpipe in self.hyperpipes: pipe = original_hyperpipe.copy_me() # crazy everything pipe += PipelineElement('StandardScaler') pipe += PipelineElement('SamplePairingClassification', {'draw_limit': [100], 'generator': Categorical(['nearest_pair', 'random_pair'])}, distance_metric='euclidean', test_disabled=True) # setup pipeline branches with half of the features each # if both PCAs are disabled, features are simply concatenated and passed to the final estimator source1_branch = Branch('source1_features') # first half of features (for Boston Housing, same as indices=[0, 1, 2, 3, 4, 5] source1_branch += DataFilter(indices=np.arange(start=0, stop=int(np.floor(self.X_shape[1] / 2)))) source1_branch += PipelineElement('PCA', hyperparameters={'n_components': Categorical([None, 5])}, test_disabled=True) source2_branch = Branch('source2_features') # second half of features (for Boston Housing, same is indices=[6, 7, 8, 9, 10, 11, 12] source2_branch += DataFilter(indices=np.arange(start=int(np.floor(self.X_shape[1] / 2)), stop=self.X_shape[1])) source2_branch += PipelineElement('PCA', hyperparameters={'n_components': Categorical([None, 5])}, test_disabled=True) # setup source branches and stack their output (i.e. horizontal concatenation) pipe += Stack('source_stack', elements=[source1_branch, source2_branch]) # final estimator with stack output as features pipe += PipelineElement('RandomForestClassifier', hyperparameters={ 'min_samples_split': FloatRange(start=.05, step=.1, stop=.26, range_type='range')}) self.run_hyperpipe(pipe, self.classification)
def test_copy_me(self): switch = Switch("my_copy_switch") switch += PipelineElement("StandardScaler") switch += PipelineElement("RobustScaler", test_disabled=True) stack = Stack("RandomStack") stack += PipelineElement("SVC") branch = Branch('Random_Branch') pca_hyperparameters = {'n_components': [5, 10]} branch += PipelineElement("PCA", hyperparameters=pca_hyperparameters) branch += PipelineElement("DecisionTreeClassifier") stack += branch photon_pipe = PhotonPipeline([("SimpleImputer", PipelineElement("SimpleImputer")), ("my_copy_switch", switch), ('RandomStack', stack), ('Callback1', CallbackElement('tmp_callback', np.mean)), ("PhotonVotingClassifier", PipelineElement("PhotonVotingClassifier"))]) copy_of_the_pipe = photon_pipe.copy_me() self.assertEqual(photon_pipe.random_state, copy_of_the_pipe.random_state) self.assertTrue(len(copy_of_the_pipe.elements) == 5) self.assertTrue(copy_of_the_pipe.elements[2][1].name == "RandomStack") self.assertTrue(copy_of_the_pipe.named_steps["my_copy_switch"].elements[1].test_disabled) self.assertDictEqual(copy_of_the_pipe.elements[2][1].elements[1].elements[0].hyperparameters, {"PCA__n_components": [5, 10]}) self.assertTrue(isinstance(copy_of_the_pipe.elements[3][1], CallbackElement)) self.assertTrue(copy_of_the_pipe.named_steps["tmp_callback"].delegate_function == np.mean)
def setup_crazy_pipe(self): # erase all, we need a complex and crazy task self.hyperpipe.elements = list() nmb_list = list() for i in range(5): nmb = NeuroBranch(name=str(i), nr_of_processes=i + 3) nmb += PipelineElement("SmoothImages") nmb_list.append(nmb) my_switch = Switch("disabling_test_switch") my_switch += nmb_list[0] my_switch += nmb_list[1] my_stack = Stack("stack_of_branches") for i in range(3): my_branch = Branch("branch_" + str(i + 2)) my_branch += PipelineElement("StandardScaler") my_branch += nmb_list[i + 2] my_stack += my_branch self.hyperpipe.add(my_stack) self.hyperpipe.add(PipelineElement("StandardScaler")) self.hyperpipe.add(my_switch) self.hyperpipe.add(PipelineElement("SVC")) return nmb_list
def test_classification_11(self): for original_hyperpipe in self.hyperpipes: pipe = original_hyperpipe.copy_me() # Simple estimator Stack (train Random Forest on estimator stack proba outputs) # create estimator stack SVC1 = PipelineElement( "SVC", hyperparameters={ "kernel": Categorical(["linear"]), "C": Categorical([0.01, 1, 5]), }, ) SVC2 = PipelineElement( "SVC", hyperparameters={ "kernel": Categorical(["rbf"]), "C": Categorical([0.01, 1, 5]), }, ) RF = PipelineElement("RandomForestClassifier") # add to pipe pipe += Stack("estimator_stack", elements=[SVC1, SVC2, RF], use_probabilities=True) pipe += PipelineElement("RandomForestClassifier") self.run_hyperpipe(pipe, self.classification)
def test_branch_in_branch(self): """ Test for deep Pipeline. """ my_pipe = Hyperpipe( "basic_stacking", optimizer="grid_search", metrics=["accuracy", "precision", "recall"], best_config_metric="f1_score", outer_cv=KFold(n_splits=2), inner_cv=KFold(n_splits=3), verbosity=1, cache_folder="./cache/", output_settings=OutputSettings(project_folder="./tmp/"), ) # BRANCH WITH QUANTILTRANSFORMER AND DECISIONTREECLASSIFIER tree_qua_branch = Branch("tree_branch") tree_qua_branch += PipelineElement("QuantileTransformer") tree_qua_branch += PipelineElement( "DecisionTreeClassifier", {"min_samples_split": IntegerRange(2, 4)}, criterion="gini", ) # BRANCH WITH MinMaxScaler AND DecisionTreeClassifier svm_mima_branch = Branch("svm_branch") svm_mima_branch += PipelineElement("MinMaxScaler") svm_mima_branch += PipelineElement( "SVC", { "kernel": ["rbf", "linear"], # Categorical(['rbf', 'linear']), "C": IntegerRange(0.01, 2.0), }, gamma="auto", ) # BRANCH WITH StandardScaler AND KNeighborsClassifier knn_sta_branch = Branch("neighbour_branch") knn_sta_branch += PipelineElement("StandardScaler") knn_sta_branch += PipelineElement("KNeighborsClassifier") # voting = True to mean the result of every branch my_pipe += Stack("final_stack", [tree_qua_branch, svm_mima_branch, knn_sta_branch]) my_pipe += PipelineElement("LogisticRegression", solver="lbfgs") json_transformer = JsonTransformer() pipe_json = json_transformer.create_json(my_pipe) my_pipe_reload = json_transformer.from_json(pipe_json) pipe_json_reload = pipe_json = json_transformer.create_json( my_pipe_reload) self.assertEqual(pipe_json, pipe_json_reload)
def test_classification_9(self): for original_hyperpipe in self.hyperpipes: pipe = original_hyperpipe.copy_me() # crazy everything pipe += PipelineElement("StandardScaler") pipe += PipelineElement( "SamplePairingClassification", { "draw_limit": [100], "generator": Categorical(["nearest_pair", "random_pair"]), }, distance_metric="euclidean", test_disabled=True, ) # setup pipeline branches with half of the features each # if both PCAs are disabled, features are simply concatenated and passed to the final estimator source1_branch = Branch("source1_features") # first half of features (for Boston Housing, same as indices=[0, 1, 2, 3, 4, 5] source1_branch += DataFilter(indices=np.arange( start=0, stop=int(np.floor(self.X_shape[1] / 2)))) source1_branch += PipelineElement( "PCA", hyperparameters={"n_components": Categorical([None, 5])}, test_disabled=True, ) source2_branch = Branch("source2_features") # second half of features (for Boston Housing, same is indices=[6, 7, 8, 9, 10, 11, 12] source2_branch += DataFilter(indices=np.arange( start=int(np.floor(self.X_shape[1] / 2)), stop=self.X_shape[1])) source2_branch += PipelineElement( "PCA", hyperparameters={"n_components": Categorical([None, 5])}, test_disabled=True, ) # setup source branches and stack their output (i.e. horizontal concatenation) pipe += Stack("source_stack", elements=[source1_branch, source2_branch]) # final estimator with stack output as features pipe += PipelineElement( "RandomForestClassifier", hyperparameters={ "min_samples_split": FloatRange(start=0.05, step=0.1, stop=0.26, range_type="range") }, ) self.run_hyperpipe(pipe, self.classification)
def test_classification_6(self): for original_hyperpipe in self.hyperpipes: pipe = original_hyperpipe.copy_me() # Simple estimator Stack (use mean in the end) SVR = PipelineElement('SVC', hyperparameters={'kernel': Categorical(['linear', 'rbf']), 'C': Categorical([.01, 1, 5])}) RF = PipelineElement('RandomForestClassifier', hyperparameters={ 'min_samples_split': FloatRange(start=.05, step=.1, stop=.26, range_type='range')}) pipe += Stack('estimator_stack', elements=[SVR, RF]) pipe += PipelineElement('PhotonVotingClassifier') self.run_hyperpipe(pipe, self.classification)
def test_add(self): stack = Stack('MyStack', [ PipelineElement('PCA', {'n_components': [5]}), PipelineElement('FastICA') ]) self.assertEqual(len(stack.elements), 2) self.assertDictEqual(stack._hyperparameters, {'MyStack__PCA__n_components': [5]}) stack = Stack('MyStack') stack += PipelineElement('PCA', {'n_components': [5]}) stack += PipelineElement('FastICA') self.assertEqual(len(stack.elements), 2) self.assertDictEqual(stack._hyperparameters, {'MyStack__PCA__n_components': [5]}) def callback(X, y=None): pass stack = Stack('MyStack', [ PipelineElement('PCA'), CallbackElement('MyCallback', callback), Switch('MySwitch', [PipelineElement('PCA'), PipelineElement('FastICA')]), Branch('MyBranch', [PipelineElement('PCA')]) ]) self.assertEqual(len(stack.elements), 4) # test doubled item with self.assertRaises(ValueError): stack += stack.elements[0] stack += PipelineElement('PCA', {'n_components': [10, 20]}) self.assertEqual(stack.elements[-1].name, 'PCA2') self.assertDictEqual( stack.hyperparameters, { 'MyStack__MySwitch__current_element': [(0, 0), (1, 0)], 'MyStack__PCA2__n_components': [10, 20] })
def test_classification_7(self): for original_hyperpipe in self.hyperpipes: pipe = original_hyperpipe.copy_me() # Simple estimator Stack, but use same machine twice SVC1 = PipelineElement('SVC', hyperparameters={'kernel': Categorical(['linear']), 'C': Categorical([.01, 1, 5])}) SVC2 = PipelineElement('SVC', hyperparameters={'kernel': Categorical(['rbf']), 'C': Categorical([.01, 1, 5])}) pipe += Stack('estimator_stack', elements=[SVC1, SVC2]) pipe += PipelineElement('PhotonVotingClassifier') self.run_hyperpipe(pipe, self.classification)
def test_branch_in_branch(self): """ Test for deep Pipeline. """ my_pipe = Hyperpipe( 'basic_stacking', optimizer='grid_search', metrics=['accuracy', 'precision', 'recall'], best_config_metric='f1_score', outer_cv=KFold(n_splits=2), inner_cv=KFold(n_splits=3), verbosity=1, cache_folder="./cache/", output_settings=OutputSettings(project_folder='./tmp/')) # BRANCH WITH QUANTILTRANSFORMER AND DECISIONTREECLASSIFIER tree_qua_branch = Branch('tree_branch') tree_qua_branch += PipelineElement('QuantileTransformer') tree_qua_branch += PipelineElement( 'DecisionTreeClassifier', {'min_samples_split': IntegerRange(2, 4)}, criterion='gini') # BRANCH WITH MinMaxScaler AND DecisionTreeClassifier svm_mima_branch = Branch('svm_branch') svm_mima_branch += PipelineElement('MinMaxScaler') svm_mima_branch += PipelineElement( 'SVC', { 'kernel': ['rbf', 'linear'], # Categorical(['rbf', 'linear']), 'C': IntegerRange(0.01, 2.0) }, gamma='auto') # BRANCH WITH StandardScaler AND KNeighborsClassifier knn_sta_branch = Branch('neighbour_branch') knn_sta_branch += PipelineElement('StandardScaler') knn_sta_branch += PipelineElement('KNeighborsClassifier') # voting = True to mean the result of every branch my_pipe += Stack('final_stack', [tree_qua_branch, svm_mima_branch, knn_sta_branch]) my_pipe += PipelineElement('LogisticRegression', solver='lbfgs') json_transformer = JsonTransformer() pipe_json = json_transformer.create_json(my_pipe) my_pipe_reload = json_transformer.from_json(pipe_json) pipe_json_reload = pipe_json = json_transformer.create_json( my_pipe_reload) self.assertEqual(pipe_json, pipe_json_reload)
def test_prepare_photon_pipeline(self): test_branch = Branch('my_test_branch') test_branch += PipelineElement('SimpleImputer') test_branch += Switch('my_crazy_switch_bitch') test_branch += Stack('my_stacking_stack') test_branch += PipelineElement('SVC') generated_pipe = test_branch.prepare_photon_pipe(test_branch.elements) self.assertEqual(len(generated_pipe.named_steps), 4) for idx, element in enumerate(test_branch.elements): self.assertIs(generated_pipe.named_steps[element.name], element) self.assertIs(generated_pipe.elements[idx][1], test_branch.elements[idx])
def test_huge_combinations(self): hp = Hyperpipe('huge_combinations', inner_cv=KFold(n_splits=3), metrics=['accuracy'], best_config_metric='accuracy', output_settings=OutputSettings(project_folder=self.tmp_folder_path)) hp += PipelineElement("PCA", hyperparameters={'n_components': [5, 10]}) stack = Stack('ensemble') for i in range(20): stack += PipelineElement('SVC', hyperparameters={'C': FloatRange(0.001, 5), 'kernel': ["linear", "rbf", "sigmoid", "polynomial"]}) hp += stack hp += PipelineElement("SVC", hyperparameters={'kernel': ["linear", "rbf", "sigmoid"]}) X, y = load_breast_cancer(return_X_y=True) with self.assertRaises(Warning): hp.fit(X, y)
def test_classification_11(self): for original_hyperpipe in self.hyperpipes: pipe = original_hyperpipe.copy_me() # Simple estimator Stack (train Random Forest on estimator stack proba outputs) # create estimator stack SVC1 = PipelineElement('SVC', hyperparameters={'kernel': Categorical(['linear']), 'C': Categorical([.01, 1, 5])}) SVC2 = PipelineElement('SVC', hyperparameters={'kernel': Categorical(['rbf']), 'C': Categorical([.01, 1, 5])}) RF = PipelineElement('RandomForestClassifier') # add to pipe pipe += Stack('estimator_stack', elements=[SVC1, SVC2, RF], use_probabilities=True) pipe += PipelineElement('RandomForestClassifier') self.run_hyperpipe(pipe, self.classification)
def test_classification_12(self): X, y = load_iris(True) # multiclass classification for original_hyperpipe in self.hyperpipes: pipe = original_hyperpipe.copy_me() # Simple estimator Stack (train Random Forest on estimator stack proba outputs) # create estimator stack SVC1 = PipelineElement('SVC', hyperparameters={'kernel': Categorical(['linear']), 'C': Categorical([.01, 1, 5])}) SVC2 = PipelineElement('SVC', hyperparameters={'kernel': Categorical(['rbf']), 'C': Categorical([.01, 1, 5])}) RF = PipelineElement('RandomForestClassifier') # add to pipe pipe += Stack('estimator_stack', elements=[SVC1, SVC2, RF], use_probabilities=True) pipe += PipelineElement('RandomForestClassifier') pipe.optimization.metrics = ['accuracy'] pipe.optimization.best_config_metric = 'accuracy' pipe.fit(X, y)
def test_set_random_state(self): # we handle all elements in one method that is inherited so we capture them all in this test random_state = 53 my_branch = Branch("random_state_branch") my_branch += PipelineElement("StandardScaler") my_switch = Switch("transformer_Switch") my_switch += PipelineElement("LassoFeatureSelection") my_switch += PipelineElement("PCA") my_branch += my_switch my_stack = Stack("Estimator_Stack") my_stack += PipelineElement("SVR") my_stack += PipelineElement("Ridge") my_branch += my_stack my_branch += PipelineElement("ElasticNet") my_branch.random_state = random_state self.assertTrue(my_switch.elements[1].random_state == random_state) self.assertTrue( my_switch.elements[1].base_element.random_state == random_state) self.assertTrue(my_stack.elements[1].random_state == random_state) self.assertTrue( my_stack.elements[1].base_element.random_state == random_state)
def setUp(self): self.X, self.y = load_breast_cancer(True) self.pca = PipelineElement('PCA', {'n_components': [5, 10]}) self.scaler = PipelineElement('StandardScaler', {'with_mean': [True]}) self.svc = PipelineElement('SVC', {'C': [1, 2]}) self.tree = PipelineElement('DecisionTreeClassifier', {'min_samples_leaf': [3, 5]}) self.transformer_branch_1 = Branch('TransBranch1', [self.pca.copy_me()]) self.transformer_branch_2 = Branch('TransBranch2', [self.scaler.copy_me()]) self.estimator_branch_1 = Branch('EstBranch1', [self.svc.copy_me()]) self.estimator_branch_2 = Branch('EstBranch2', [self.tree.copy_me()]) self.transformer_stack = Stack( 'TransformerStack', [self.pca.copy_me(), self.scaler.copy_me()]) self.estimator_stack = Stack( 'EstimatorStack', [self.svc.copy_me(), self.tree.copy_me()]) self.transformer_branch_stack = Stack('TransBranchStack', [ self.transformer_branch_1.copy_me(), self.transformer_branch_2.copy_me() ]) self.estimator_branch_stack = Stack('EstBranchStack', [ self.estimator_branch_1.copy_me(), self.estimator_branch_2.copy_me() ]) self.stacks = [ ([self.pca, self.scaler], self.transformer_stack), ([self.svc, self.tree], self.estimator_stack), ([self.transformer_branch_1, self.transformer_branch_2], self.transformer_branch_stack), ([self.estimator_branch_1, self.estimator_branch_2], self.estimator_branch_stack) ]
def test_classification_7(self): for original_hyperpipe in self.hyperpipes: pipe = original_hyperpipe.copy_me() # Simple estimator Stack, but use same machine twice SVC1 = PipelineElement( "SVC", hyperparameters={ "kernel": Categorical(["linear"]), "C": Categorical([0.01, 1, 5]), }, ) SVC2 = PipelineElement( "SVC", hyperparameters={ "kernel": Categorical(["rbf"]), "C": Categorical([0.01, 1, 5]), }, ) pipe += Stack("estimator_stack", elements=[SVC1, SVC2]) pipe += PipelineElement("PhotonVotingClassifier") self.run_hyperpipe(pipe, self.classification)
def test_classification_8(self): for original_hyperpipe in self.hyperpipes: pipe = original_hyperpipe.copy_me() pipe += PipelineElement('StandardScaler') # setup pipeline branches with half of the features each # if both PCAs are disabled, features are simply concatenated and passed to the final estimator source1_branch = Branch('source1_features') # first half of features (for Boston Housing, same as indices=[0, 1, 2, 3, 4, 5] source1_branch += DataFilter(indices=np.arange(start=0, stop=int(np.floor(self.X_shape[1] / 2)))) source1_branch += PipelineElement('ConfounderRemoval', {}, standardize_covariates=True, test_disabled=True, confounder_names=['cov1', 'cov2']) source1_branch += PipelineElement('PCA', hyperparameters={'n_components': Categorical([None, 5])}, test_disabled=True) source2_branch = Branch('source2_features') # second half of features (for Boston Housing, same is indices=[6, 7, 8, 9, 10, 11, 12] source2_branch += DataFilter(indices=np.arange(start=int(np.floor(self.X_shape[1] / 2)), stop=self.X_shape[1])) source2_branch += PipelineElement('ConfounderRemoval', {}, standardize_covariates=True, test_disabled=True, confounder_names=['cov1', 'cov2']) source2_branch += PipelineElement('PCA', hyperparameters={'n_components': Categorical([None, 5])}, test_disabled=True) # setup source branches and stack their output (i.e. horizontal concatenation) pipe += Stack('source_stack', elements=[source1_branch, source2_branch]) # final estimator with stack output as features # setup estimator switch and add it to the pipe switch = Switch('estimator_switch') switch += PipelineElement('SVC', hyperparameters={'kernel': Categorical(['linear', 'rbf']), 'C': Categorical([.01, 1, 5])}) switch += PipelineElement('RandomForestClassifier', hyperparameters={ 'min_samples_split': FloatRange(start=.05, step=.1, stop=.26, range_type='range')}) pipe += switch self.run_hyperpipe(pipe, self.classification)
my_pipe = Hyperpipe( "basic_stack_pipe", optimizer="sk_opt", optimizer_params={"n_configurations": 5}, metrics=["accuracy", "precision", "recall"], best_config_metric="accuracy", outer_cv=KFold(n_splits=3), inner_cv=KFold(n_splits=3), verbosity=1, output_settings=OutputSettings(project_folder="./tmp/"), ) my_pipe += PipelineElement("StandardScaler") tree = PipelineElement( "DecisionTreeClassifier", hyperparameters={ "criterion": ["gini"], "min_samples_split": IntegerRange(2, 4) }, ) svc = PipelineElement("LinearSVC", hyperparameters={"C": FloatRange(0.5, 25)}) # for a stack that includes estimators you can choose whether predict or predict_proba is called for all estimators # in case only some implement predict_proba, predict is called for the remaining estimators my_pipe += Stack("final_stack", [tree, svc], use_probabilities=True) my_pipe += PipelineElement("LinearSVC") my_pipe.fit(X, y)
output_settings=OutputSettings(project_folder='./tmp/')) # BRANCH WITH QUANTILTRANSFORMER AND DECISIONTREECLASSIFIER tree_qua_branch = Branch('tree_branch') tree_qua_branch += PipelineElement('QuantileTransformer') tree_qua_branch += PipelineElement('DecisionTreeClassifier', {'min_samples_split': IntegerRange(2, 4)}, criterion='gini') # BRANCH WITH MinMaxScaler AND DecisionTreeClassifier svm_mima_branch = Branch('svm_branch') svm_mima_branch += PipelineElement('MinMaxScaler') svm_mima_branch += PipelineElement('SVC', { 'kernel': Categorical(['rbf', 'linear']), 'C': IntegerRange(0.01, 2.0) }, gamma='auto') # BRANCH WITH StandardScaler AND KNeighborsClassifier knn_sta_branch = Branch('neighbour_branch') knn_sta_branch += PipelineElement('StandardScaler') knn_sta_branch += PipelineElement('KNeighborsClassifier') # voting = True to mean the result of every branch my_pipe += Stack('final_stack', [tree_qua_branch, svm_mima_branch, knn_sta_branch]) my_pipe += PipelineElement('LogisticRegression', solver='lbfgs') my_pipe.fit(X, y)
class StackTests(unittest.TestCase): def setUp(self): self.X, self.y = load_breast_cancer(True) self.pca = PipelineElement('PCA', {'n_components': [5, 10]}) self.scaler = PipelineElement('StandardScaler', {'with_mean': [True]}) self.svc = PipelineElement('SVC', {'C': [1, 2]}) self.tree = PipelineElement('DecisionTreeClassifier', {'min_samples_leaf': [3, 5]}) self.transformer_branch_1 = Branch('TransBranch1', [self.pca.copy_me()]) self.transformer_branch_2 = Branch('TransBranch2', [self.scaler.copy_me()]) self.estimator_branch_1 = Branch('EstBranch1', [self.svc.copy_me()]) self.estimator_branch_2 = Branch('EstBranch2', [self.tree.copy_me()]) self.transformer_stack = Stack( 'TransformerStack', [self.pca.copy_me(), self.scaler.copy_me()]) self.estimator_stack = Stack( 'EstimatorStack', [self.svc.copy_me(), self.tree.copy_me()]) self.transformer_branch_stack = Stack('TransBranchStack', [ self.transformer_branch_1.copy_me(), self.transformer_branch_2.copy_me() ]) self.estimator_branch_stack = Stack('EstBranchStack', [ self.estimator_branch_1.copy_me(), self.estimator_branch_2.copy_me() ]) self.stacks = [ ([self.pca, self.scaler], self.transformer_stack), ([self.svc, self.tree], self.estimator_stack), ([self.transformer_branch_1, self.transformer_branch_2], self.transformer_branch_stack), ([self.estimator_branch_1, self.estimator_branch_2], self.estimator_branch_stack) ] def test_copy_me(self): for stack in self.stacks: stack = stack[1] copy = stack.copy_me() self.assertEqual(stack.random_state, copy.random_state) self.assertFalse( stack.elements[0].__dict__ == copy.elements[0].__dict__) self.assertDictEqual(elements_to_dict(stack), elements_to_dict(copy)) def test_horizontal_stacking(self): for stack in self.stacks: element_1 = stack[0][0] element_2 = stack[0][1] stack = stack[1] # fit elements Xt_1 = element_1.fit(self.X, self.y).transform(self.X, self.y) Xt_2 = element_2.fit(self.X, self.y).transform(self.X, self.y) Xt = stack.fit(self.X, self.y).transform(self.X, self.y) # output of transform() changes depending on whether it is an estimator stack or a transformer stack if isinstance(Xt, tuple): Xt = Xt[0] Xt_1 = Xt_1[0] Xt_2 = Xt_2[0] if len(Xt_1.shape) == 1: Xt_1 = np.reshape(Xt_1, (-1, 1)) Xt_2 = np.reshape(Xt_2, (-1, 1)) self.assertEqual(Xt.shape[1], Xt_1.shape[-1] + Xt_2.shape[-1]) def recursive_assertion(self, element_a, element_b): for key in element_a.keys(): if isinstance(element_a[key], np.ndarray): np.testing.assert_array_equal(element_a[key], element_b[key]) elif isinstance(element_a[key], dict): self.recursive_assertion(element_a[key], element_b[key]) else: self.assertEqual(element_a[key], element_b[key]) def test_fit(self): for elements, stack in [([self.pca, self.scaler], self.transformer_stack), ([self.svc, self.tree], self.estimator_stack)]: np.random.seed(42) stack = stack.fit(self.X, self.y) np.random.seed(42) for i, element in enumerate(elements): element = element.fit(self.X, self.y) element_dict = elements_to_dict(element) stack_dict = elements_to_dict(stack.elements[i]) self.recursive_assertion(element_dict, stack_dict) def test_transform(self): for elements, stack in self.stacks: np.random.seed(42) Xt_stack, _, _ = stack.fit(self.X, self.y).transform(self.X) np.random.seed(42) Xt_elements = None for i, element in enumerate(elements): Xt_element, _, _ = element.fit(self.X, self.y).transform(self.X) Xt_elements = PhotonDataHelper.stack_data_horizontally( Xt_elements, Xt_element) np.testing.assert_array_equal(Xt_stack, Xt_elements) def test_predict(self): for elements, stack in [ ([self.svc, self.tree], self.estimator_stack), ([self.estimator_branch_1, self.estimator_branch_2], self.estimator_branch_stack) ]: np.random.seed(42) stack = stack.fit(self.X, self.y) yt_stack = stack.predict(self.X) np.random.seed(42) Xt_elements = None for i, element in enumerate(elements): Xt_element = element.fit(self.X, self.y).predict(self.X) Xt_elements = PhotonDataHelper.stack_data_horizontally( Xt_elements, Xt_element) np.testing.assert_array_equal(yt_stack, Xt_elements) def test_predict_proba(self): for elements, stack in [ ([self.svc, self.tree], self.estimator_stack), ([self.estimator_branch_1, self.estimator_branch_2], self.estimator_branch_stack) ]: np.random.seed(42) stack = stack.fit(self.X, self.y) yt_stack = stack.predict_proba(self.X) np.random.seed(42) Xt_elements = None for i, element in enumerate(elements): Xt_element = element.fit(self.X, self.y).predict_proba(self.X) if Xt_element is None: Xt_element = element.fit(self.X, self.y).predict(self.X) Xt_elements = PhotonDataHelper.stack_data_horizontally( Xt_elements, Xt_element) np.testing.assert_array_equal(yt_stack, Xt_elements) def test_inverse_transform(self): with self.assertRaises(NotImplementedError): self.stacks[0][1].fit(self.X, self.y).inverse_transform(self.X) def test_set_params(self): trans_config = { 'PCA__n_components': 2, 'PCA__disabled': True, 'StandardScaler__with_mean': True } est_config = { 'SVC__C': 3, 'DecisionTreeClassifier__min_samples_leaf': 1 } # transformer stack self.transformer_stack.set_params(**trans_config) self.assertEqual( self.transformer_stack.elements[0].base_element.n_components, 2) self.assertEqual(self.transformer_stack.elements[0].disabled, True) self.assertEqual( self.transformer_stack.elements[1].base_element.with_mean, True) # estimator stack self.estimator_stack.set_params(**est_config) self.assertEqual(self.estimator_stack.elements[0].base_element.C, 3) self.assertEqual( self.estimator_stack.elements[1].base_element.min_samples_leaf, 1) with self.assertRaises(ValueError): self.estimator_stack.set_params(**{'any_weird_param': 1}) with self.assertRaises(ValueError): self.transformer_stack.set_params(**{'any_weird_param': 1}) def test_add(self): stack = Stack('MyStack', [ PipelineElement('PCA', {'n_components': [5]}), PipelineElement('FastICA') ]) self.assertEqual(len(stack.elements), 2) self.assertDictEqual(stack._hyperparameters, {'MyStack__PCA__n_components': [5]}) stack = Stack('MyStack') stack += PipelineElement('PCA', {'n_components': [5]}) stack += PipelineElement('FastICA') self.assertEqual(len(stack.elements), 2) self.assertDictEqual(stack._hyperparameters, {'MyStack__PCA__n_components': [5]}) def callback(X, y=None): pass stack = Stack('MyStack', [ PipelineElement('PCA'), CallbackElement('MyCallback', callback), Switch('MySwitch', [PipelineElement('PCA'), PipelineElement('FastICA')]), Branch('MyBranch', [PipelineElement('PCA')]) ]) self.assertEqual(len(stack.elements), 4) # test doubled item with self.assertRaises(ValueError): stack += stack.elements[0] stack += PipelineElement('PCA', {'n_components': [10, 20]}) self.assertEqual(stack.elements[-1].name, 'PCA2') self.assertDictEqual( stack.hyperparameters, { 'MyStack__MySwitch__current_element': [(0, 0), (1, 0)], 'MyStack__PCA2__n_components': [10, 20] }) def test_feature_importances(self): # single item self.estimator_stack.fit(self.X, self.y) self.assertIsNone(self.estimator_stack.feature_importances_) self.estimator_branch_stack.fit(self.X, self.y) self.assertIsNone(self.estimator_branch_stack.feature_importances_) def test_use_probabilities(self): self.estimator_stack.use_probabilities = True self.estimator_stack.fit(self.X, self.y) probas = self.estimator_stack.predict(self.X) self.assertEqual(probas.shape[1], 3) self.estimator_stack.use_probabilities = False self.estimator_stack.fit(self.X, self.y) preds = self.estimator_stack.predict(self.X) self.assertEqual(preds.shape[1], 2) probas = self.estimator_stack.predict_proba(self.X) self.assertEqual(probas.shape[1], 3)
def test_class_with_data_02(self): """ Test for Pipeline with data. """ X, y = load_breast_cancer(return_X_y=True) # DESIGN YOUR PIPELINE my_pipe = Hyperpipe( name='Estimator_pipe', optimizer='grid_search', metrics=['balanced_accuracy'], best_config_metric='balanced_accuracy', outer_cv=StratifiedKFold(n_splits=2, shuffle=True, random_state=42), inner_cv=StratifiedKFold(n_splits=2, shuffle=True, random_state=42), output_settings=OutputSettings(project_folder='./tmp/'), random_seed=42) # ADD ELEMENTS TO YOUR PIPELINE # first normalize all features my_pipe += PipelineElement('StandardScaler') # some feature selection my_pipe += PipelineElement('LassoFeatureSelection', hyperparameters={ 'percentile_to_keep': FloatRange(start=0.1, step=0.1, stop=0.7, range_type='range'), 'alpha': FloatRange(0.5, 1) }, test_disabled=True) # add imbalanced group handling my_pipe += PipelineElement('ImbalancedDataTransformer', method_name='SMOTE', test_disabled=False) # setup estimator stack est_stack = Stack(name='classifier_stack') clf_list = [ 'RandomForestClassifier', 'LinearSVC', 'NuSVC', "SVC", "MLPClassifier", "KNeighborsClassifier", "Lasso", "PassiveAggressiveClassifier", "LogisticRegression", "Perceptron", "RidgeClassifier", "SGDClassifier", "GaussianProcessClassifier", "AdaBoostClassifier", "BaggingClassifier", "GradientBoostingClassifier" ] for clf in clf_list: est_stack += PipelineElement(clf) my_pipe += est_stack my_pipe += PipelineElement('PhotonVotingClassifier') json_transformer = JsonTransformer() pipe_json = json_transformer.create_json(my_pipe) my_pipe_reload = json_transformer.from_json(pipe_json) self.assertDictEqual(elements_to_dict(my_pipe.copy_me()), elements_to_dict(my_pipe_reload.copy_me()))