def setUp(self): self.X, self.y = load_breast_cancer(True) self.svc = PipelineElement('SVC', { 'C': [0.1, 1], 'kernel': ['rbf', 'sigmoid'] }) self.tree = PipelineElement('DecisionTreeClassifier', {'min_samples_split': [2, 3, 4]}) self.gpc = PipelineElement('GaussianProcessClassifier') self.pca = PipelineElement('PCA') self.estimator_branch = Branch('estimator_branch', [self.tree.copy_me()]) self.transformer_branch = Branch('transformer_branch', [self.pca.copy_me()]) self.estimator_switch = Switch( 'estimator_switch', [self.svc.copy_me(), self.tree.copy_me(), self.gpc.copy_me()]) self.estimator_switch_with_branch = Switch( 'estimator_switch_with_branch', [self.tree.copy_me(), self.estimator_branch.copy_me()]) self.transformer_switch_with_branch = Switch( 'transformer_switch_with_branch', [self.pca.copy_me(), self.transformer_branch.copy_me()]) self.switch_in_switch = Switch('Switch_in_switch', [ self.transformer_branch.copy_me(), self.transformer_switch_with_branch.copy_me() ])
def test_classification_9(self): for original_hyperpipe in self.hyperpipes: pipe = original_hyperpipe.copy_me() # crazy everything pipe += PipelineElement('StandardScaler') pipe += PipelineElement('SamplePairingClassification', {'draw_limit': [100], 'generator': Categorical(['nearest_pair', 'random_pair'])}, distance_metric='euclidean', test_disabled=True) # setup pipeline branches with half of the features each # if both PCAs are disabled, features are simply concatenated and passed to the final estimator source1_branch = Branch('source1_features') # first half of features (for Boston Housing, same as indices=[0, 1, 2, 3, 4, 5] source1_branch += DataFilter(indices=np.arange(start=0, stop=int(np.floor(self.X_shape[1] / 2)))) source1_branch += PipelineElement('PCA', hyperparameters={'n_components': Categorical([None, 5])}, test_disabled=True) source2_branch = Branch('source2_features') # second half of features (for Boston Housing, same is indices=[6, 7, 8, 9, 10, 11, 12] source2_branch += DataFilter(indices=np.arange(start=int(np.floor(self.X_shape[1] / 2)), stop=self.X_shape[1])) source2_branch += PipelineElement('PCA', hyperparameters={'n_components': Categorical([None, 5])}, test_disabled=True) # setup source branches and stack their output (i.e. horizontal concatenation) pipe += Stack('source_stack', elements=[source1_branch, source2_branch]) # final estimator with stack output as features pipe += PipelineElement('RandomForestClassifier', hyperparameters={ 'min_samples_split': FloatRange(start=.05, step=.1, stop=.26, range_type='range')}) self.run_hyperpipe(pipe, self.classification)
def test_branch_in_branch(self): """ Test for deep Pipeline. """ my_pipe = Hyperpipe( "basic_stacking", optimizer="grid_search", metrics=["accuracy", "precision", "recall"], best_config_metric="f1_score", outer_cv=KFold(n_splits=2), inner_cv=KFold(n_splits=3), verbosity=1, cache_folder="./cache/", output_settings=OutputSettings(project_folder="./tmp/"), ) # BRANCH WITH QUANTILTRANSFORMER AND DECISIONTREECLASSIFIER tree_qua_branch = Branch("tree_branch") tree_qua_branch += PipelineElement("QuantileTransformer") tree_qua_branch += PipelineElement( "DecisionTreeClassifier", {"min_samples_split": IntegerRange(2, 4)}, criterion="gini", ) # BRANCH WITH MinMaxScaler AND DecisionTreeClassifier svm_mima_branch = Branch("svm_branch") svm_mima_branch += PipelineElement("MinMaxScaler") svm_mima_branch += PipelineElement( "SVC", { "kernel": ["rbf", "linear"], # Categorical(['rbf', 'linear']), "C": IntegerRange(0.01, 2.0), }, gamma="auto", ) # BRANCH WITH StandardScaler AND KNeighborsClassifier knn_sta_branch = Branch("neighbour_branch") knn_sta_branch += PipelineElement("StandardScaler") knn_sta_branch += PipelineElement("KNeighborsClassifier") # voting = True to mean the result of every branch my_pipe += Stack("final_stack", [tree_qua_branch, svm_mima_branch, knn_sta_branch]) my_pipe += PipelineElement("LogisticRegression", solver="lbfgs") json_transformer = JsonTransformer() pipe_json = json_transformer.create_json(my_pipe) my_pipe_reload = json_transformer.from_json(pipe_json) pipe_json_reload = pipe_json = json_transformer.create_json( my_pipe_reload) self.assertEqual(pipe_json, pipe_json_reload)
def test_classification_9(self): for original_hyperpipe in self.hyperpipes: pipe = original_hyperpipe.copy_me() # crazy everything pipe += PipelineElement("StandardScaler") pipe += PipelineElement( "SamplePairingClassification", { "draw_limit": [100], "generator": Categorical(["nearest_pair", "random_pair"]), }, distance_metric="euclidean", test_disabled=True, ) # setup pipeline branches with half of the features each # if both PCAs are disabled, features are simply concatenated and passed to the final estimator source1_branch = Branch("source1_features") # first half of features (for Boston Housing, same as indices=[0, 1, 2, 3, 4, 5] source1_branch += DataFilter(indices=np.arange( start=0, stop=int(np.floor(self.X_shape[1] / 2)))) source1_branch += PipelineElement( "PCA", hyperparameters={"n_components": Categorical([None, 5])}, test_disabled=True, ) source2_branch = Branch("source2_features") # second half of features (for Boston Housing, same is indices=[6, 7, 8, 9, 10, 11, 12] source2_branch += DataFilter(indices=np.arange( start=int(np.floor(self.X_shape[1] / 2)), stop=self.X_shape[1])) source2_branch += PipelineElement( "PCA", hyperparameters={"n_components": Categorical([None, 5])}, test_disabled=True, ) # setup source branches and stack their output (i.e. horizontal concatenation) pipe += Stack("source_stack", elements=[source1_branch, source2_branch]) # final estimator with stack output as features pipe += PipelineElement( "RandomForestClassifier", hyperparameters={ "min_samples_split": FloatRange(start=0.05, step=0.1, stop=0.26, range_type="range") }, ) self.run_hyperpipe(pipe, self.classification)
def test_branch_in_branch(self): """ Test for deep Pipeline. """ my_pipe = Hyperpipe( 'basic_stacking', optimizer='grid_search', metrics=['accuracy', 'precision', 'recall'], best_config_metric='f1_score', outer_cv=KFold(n_splits=2), inner_cv=KFold(n_splits=3), verbosity=1, cache_folder="./cache/", output_settings=OutputSettings(project_folder='./tmp/')) # BRANCH WITH QUANTILTRANSFORMER AND DECISIONTREECLASSIFIER tree_qua_branch = Branch('tree_branch') tree_qua_branch += PipelineElement('QuantileTransformer') tree_qua_branch += PipelineElement( 'DecisionTreeClassifier', {'min_samples_split': IntegerRange(2, 4)}, criterion='gini') # BRANCH WITH MinMaxScaler AND DecisionTreeClassifier svm_mima_branch = Branch('svm_branch') svm_mima_branch += PipelineElement('MinMaxScaler') svm_mima_branch += PipelineElement( 'SVC', { 'kernel': ['rbf', 'linear'], # Categorical(['rbf', 'linear']), 'C': IntegerRange(0.01, 2.0) }, gamma='auto') # BRANCH WITH StandardScaler AND KNeighborsClassifier knn_sta_branch = Branch('neighbour_branch') knn_sta_branch += PipelineElement('StandardScaler') knn_sta_branch += PipelineElement('KNeighborsClassifier') # voting = True to mean the result of every branch my_pipe += Stack('final_stack', [tree_qua_branch, svm_mima_branch, knn_sta_branch]) my_pipe += PipelineElement('LogisticRegression', solver='lbfgs') json_transformer = JsonTransformer() pipe_json = json_transformer.create_json(my_pipe) my_pipe_reload = json_transformer.from_json(pipe_json) pipe_json_reload = pipe_json = json_transformer.create_json( my_pipe_reload) self.assertEqual(pipe_json, pipe_json_reload)
def test_copy_me(self): switch = Switch("my_copy_switch") switch += PipelineElement("StandardScaler") switch += PipelineElement("RobustScaler", test_disabled=True) stack = Stack("RandomStack") stack += PipelineElement("SVC") branch = Branch('Random_Branch') pca_hyperparameters = {'n_components': [5, 10]} branch += PipelineElement("PCA", hyperparameters=pca_hyperparameters) branch += PipelineElement("DecisionTreeClassifier") stack += branch photon_pipe = PhotonPipeline([("SimpleImputer", PipelineElement("SimpleImputer")), ("my_copy_switch", switch), ('RandomStack', stack), ('Callback1', CallbackElement('tmp_callback', np.mean)), ("PhotonVotingClassifier", PipelineElement("PhotonVotingClassifier"))]) copy_of_the_pipe = photon_pipe.copy_me() self.assertEqual(photon_pipe.random_state, copy_of_the_pipe.random_state) self.assertTrue(len(copy_of_the_pipe.elements) == 5) self.assertTrue(copy_of_the_pipe.elements[2][1].name == "RandomStack") self.assertTrue(copy_of_the_pipe.named_steps["my_copy_switch"].elements[1].test_disabled) self.assertDictEqual(copy_of_the_pipe.elements[2][1].elements[1].elements[0].hyperparameters, {"PCA__n_components": [5, 10]}) self.assertTrue(isinstance(copy_of_the_pipe.elements[3][1], CallbackElement)) self.assertTrue(copy_of_the_pipe.named_steps["tmp_callback"].delegate_function == np.mean)
def setup_crazy_pipe(self): # erase all, we need a complex and crazy task self.hyperpipe.elements = list() nmb_list = list() for i in range(5): nmb = ParallelBranch(name=str(i), nr_of_processes=i + 3) sp = PipelineElement( 'PCA', hyperparameters={'n_components': IntegerRange(1, 50)}) nmb += sp nmb_list.append(nmb) my_switch = Switch('disabling_test_switch') my_switch += nmb_list[0] my_switch += nmb_list[1] my_stack = Stack('stack_of_branches') for i in range(3): my_branch = Branch('branch_' + str(i + 2)) my_branch += PipelineElement('StandardScaler') my_branch += nmb_list[i + 2] my_stack += my_branch self.hyperpipe.add(my_stack) self.hyperpipe.add(PipelineElement('StandardScaler')) self.hyperpipe.add(my_switch) self.hyperpipe.add(PipelineElement('SVC')) return nmb_list
def test_ask_advanced(self): """ Test advanced functionality of .ask() """ branch = Branch('branch') branch += PipelineElement('PCA') branch += PipelineElement('SVC', { 'C': [0.1, 1], 'kernel': ['rbf', 'sigmoid'] }) pipe_switch = Switch('switch', [ PipelineElement("StandardScaler"), PipelineElement("MaxAbsScaler") ]) self.pipeline_elements = [ PipelineElement("StandardScaler"), PipelineElement( 'PCA', hyperparameters={'n_components': IntegerRange(5, 20)}, test_disabled=True), pipe_switch, branch, Switch('Switch_in_switch', [branch, pipe_switch]) ] generated_elements = self.test_ask() self.assertIn("PCA__n_components", generated_elements) self.assertIn("Switch_in_switch__current_element", generated_elements) self.assertIn("branch__SVC__C", generated_elements) self.assertIn("branch__SVC__kernel", generated_elements) self.assertIn("switch__current_element", generated_elements)
def test_ask_advanced(self): """ Test advanced functionality of .ask() """ branch = Branch("branch") branch += PipelineElement("PCA") branch += PipelineElement("SVC", { "C": [0.1, 1], "kernel": ["rbf", "sigmoid"] }) pipe_switch = Switch( "switch", [ PipelineElement("StandardScaler"), PipelineElement("MaxAbsScaler") ], ) self.pipeline_elements = [ PipelineElement("StandardScaler"), PipelineElement( "PCA", hyperparameters={"n_components": IntegerRange(5, 20)}, test_disabled=True, ), pipe_switch, branch, Switch("Switch_in_switch", [branch, pipe_switch]), ] generated_elements = self.test_ask() self.assertIn("PCA__n_components", generated_elements) self.assertIn("Switch_in_switch__current_element", generated_elements) self.assertIn("branch__SVC__C", generated_elements) self.assertIn("branch__SVC__kernel", generated_elements) self.assertIn("switch__current_element", generated_elements)
def setup_crazy_pipe(self): # erase all, we need a complex and crazy task self.hyperpipe.elements = list() nmb_list = list() for i in range(5): nmb = NeuroBranch(name=str(i), nr_of_processes=i + 3) nmb += PipelineElement("SmoothImages") nmb_list.append(nmb) my_switch = Switch("disabling_test_switch") my_switch += nmb_list[0] my_switch += nmb_list[1] my_stack = Stack("stack_of_branches") for i in range(3): my_branch = Branch("branch_" + str(i + 2)) my_branch += PipelineElement("StandardScaler") my_branch += nmb_list[i + 2] my_stack += my_branch self.hyperpipe.add(my_stack) self.hyperpipe.add(PipelineElement("StandardScaler")) self.hyperpipe.add(my_switch) self.hyperpipe.add(PipelineElement("SVC")) return nmb_list
def test_estimator_type(self): def callback(X, y=None): pass transformer_branch = Branch( 'TransBranch', [PipelineElement('PCA'), PipelineElement('FastICA')]) classifier_branch = Branch('ClassBranch', [PipelineElement('SVC')]) regressor_branch = Branch('RegBranch', [PipelineElement('SVR')]) callback_branch = Branch( 'CallBranch', [PipelineElement('SVR'), CallbackElement('callback', callback)]) self.assertEqual(transformer_branch._estimator_type, None) self.assertEqual(classifier_branch._estimator_type, 'classifier') self.assertEqual(regressor_branch._estimator_type, 'regressor') self.assertEqual(callback_branch._estimator_type, None)
def setUp(self): self.svc_pipe_element = PipelineElement('SVC', {'C': [0.1, 1], 'kernel': ['rbf', 'sigmoid']}) self.lr_pipe_element = PipelineElement('DecisionTreeClassifier', {'min_samples_split': [2, 3, 4]}) self.pipe_switch = Switch('switch', [self.svc_pipe_element, self.lr_pipe_element]) self.branch = Branch('branch') self.branch += PipelineElement('PCA') self.branch += self.svc_pipe_element self.switch_in_switch = Switch('Switch_in_switch', [self.branch, self.pipe_switch])
def setUp(self): self.X, self.y = load_breast_cancer(True) self.scaler = PipelineElement("StandardScaler", {'with_mean': True}) self.pca = PipelineElement('PCA', {'n_components': [1, 2]}, test_disabled=True, random_state=3) self.tree = PipelineElement('DecisionTreeClassifier', {'min_samples_split': [2, 3, 4]}, random_state=3) self.transformer_branch = Branch('MyBranch', [self.scaler, self.pca]) self.transformer_branch_sklearn = SKPipeline([("SS", StandardScaler()), ("PCA", PCA(random_state=3))]) self.estimator_branch = Branch('MyBranch', [self.scaler, self.pca, self.tree]) self.estimator_branch_sklearn = SKPipeline([ ("SS", StandardScaler()), ("PCA", PCA(random_state=3)), ("Tree", DecisionTreeClassifier(random_state=3)) ])
def setUp(self): self.X, self.y = load_breast_cancer(True) self.pca = PipelineElement('PCA', {'n_components': [5, 10]}) self.scaler = PipelineElement('StandardScaler', {'with_mean': [True]}) self.svc = PipelineElement('SVC', {'C': [1, 2]}) self.tree = PipelineElement('DecisionTreeClassifier', {'min_samples_leaf': [3, 5]}) self.transformer_branch_1 = Branch('TransBranch1', [self.pca.copy_me()]) self.transformer_branch_2 = Branch('TransBranch2', [self.scaler.copy_me()]) self.estimator_branch_1 = Branch('EstBranch1', [self.svc.copy_me()]) self.estimator_branch_2 = Branch('EstBranch2', [self.tree.copy_me()]) self.transformer_stack = Stack( 'TransformerStack', [self.pca.copy_me(), self.scaler.copy_me()]) self.estimator_stack = Stack( 'EstimatorStack', [self.svc.copy_me(), self.tree.copy_me()]) self.transformer_branch_stack = Stack('TransBranchStack', [ self.transformer_branch_1.copy_me(), self.transformer_branch_2.copy_me() ]) self.estimator_branch_stack = Stack('EstBranchStack', [ self.estimator_branch_1.copy_me(), self.estimator_branch_2.copy_me() ]) self.stacks = [ ([self.pca, self.scaler], self.transformer_stack), ([self.svc, self.tree], self.estimator_stack), ([self.transformer_branch_1, self.transformer_branch_2], self.transformer_branch_stack), ([self.estimator_branch_1, self.estimator_branch_2], self.estimator_branch_stack) ]
def test_sanity_check_pipe(self): test_branch = Branch('my_test_branch') def callback_func(X, y, **kwargs): pass with self.assertRaises(Warning): my_callback = CallbackElement('final_element_callback', delegate_function=callback_func) test_branch += my_callback no_callback_pipe = test_branch.prepare_photon_pipe( test_branch.elements) test_branch.sanity_check_pipeline(no_callback_pipe) self.assertFalse(no_callback_pipe[-1] is not my_callback)
def test_add(self): branch = Branch('MyBranch', [ PipelineElement('PCA', {'n_components': [5]}), PipelineElement('FastICA') ]) self.assertEqual(len(branch.elements), 2) self.assertDictEqual(branch._hyperparameters, {'MyBranch__PCA__n_components': [5]}) branch = Branch('MyBranch') branch += PipelineElement('PCA', {'n_components': [5]}) branch += PipelineElement('FastICA') self.assertEqual(len(branch.elements), 2) self.assertDictEqual(branch._hyperparameters, {'MyBranch__PCA__n_components': [5]}) # add doubled item branch += PipelineElement('PCA', {'n_components': [10, 20]}) self.assertEqual(branch.elements[-1].name, 'PCA2') self.assertDictEqual( branch.hyperparameters, { 'MyBranch__PCA__n_components': [5], 'MyBranch__PCA2__n_components': [10, 20] })
def test_prepare_photon_pipeline(self): test_branch = Branch('my_test_branch') test_branch += PipelineElement('SimpleImputer') test_branch += Switch('my_crazy_switch_bitch') test_branch += Stack('my_stacking_stack') test_branch += PipelineElement('SVC') generated_pipe = test_branch.prepare_photon_pipe(test_branch.elements) self.assertEqual(len(generated_pipe.named_steps), 4) for idx, element in enumerate(test_branch.elements): self.assertIs(generated_pipe.named_steps[element.name], element) self.assertIs(generated_pipe.elements[idx][1], test_branch.elements[idx])
def test_classification_8(self): for original_hyperpipe in self.hyperpipes: pipe = original_hyperpipe.copy_me() pipe += PipelineElement('StandardScaler') # setup pipeline branches with half of the features each # if both PCAs are disabled, features are simply concatenated and passed to the final estimator source1_branch = Branch('source1_features') # first half of features (for Boston Housing, same as indices=[0, 1, 2, 3, 4, 5] source1_branch += DataFilter(indices=np.arange(start=0, stop=int(np.floor(self.X_shape[1] / 2)))) source1_branch += PipelineElement('ConfounderRemoval', {}, standardize_covariates=True, test_disabled=True, confounder_names=['cov1', 'cov2']) source1_branch += PipelineElement('PCA', hyperparameters={'n_components': Categorical([None, 5])}, test_disabled=True) source2_branch = Branch('source2_features') # second half of features (for Boston Housing, same is indices=[6, 7, 8, 9, 10, 11, 12] source2_branch += DataFilter(indices=np.arange(start=int(np.floor(self.X_shape[1] / 2)), stop=self.X_shape[1])) source2_branch += PipelineElement('ConfounderRemoval', {}, standardize_covariates=True, test_disabled=True, confounder_names=['cov1', 'cov2']) source2_branch += PipelineElement('PCA', hyperparameters={'n_components': Categorical([None, 5])}, test_disabled=True) # setup source branches and stack their output (i.e. horizontal concatenation) pipe += Stack('source_stack', elements=[source1_branch, source2_branch]) # final estimator with stack output as features # setup estimator switch and add it to the pipe switch = Switch('estimator_switch') switch += PipelineElement('SVC', hyperparameters={'kernel': Categorical(['linear', 'rbf']), 'C': Categorical([.01, 1, 5])}) switch += PipelineElement('RandomForestClassifier', hyperparameters={ 'min_samples_split': FloatRange(start=.05, step=.1, stop=.26, range_type='range')}) pipe += switch self.run_hyperpipe(pipe, self.classification)
def setUp(self): self.svc_pipe_element = PipelineElement("SVC", { "C": [0.1, 1], "kernel": ["rbf", "sigmoid"] }) self.lr_pipe_element = PipelineElement( "DecisionTreeClassifier", {"min_samples_split": [2, 3, 4]}) self.pipe_switch = Switch( "switch", [self.svc_pipe_element, self.lr_pipe_element]) self.branch = Branch("branch") self.branch += PipelineElement("PCA") self.branch += self.svc_pipe_element self.switch_in_switch = Switch("Switch_in_switch", [self.branch, self.pipe_switch])
def test_copy_me(self): branch = Branch('MyBranch', [self.scaler, self.pca]) copy = branch.copy_me() self.assertEqual(branch.random_state, copy.random_state) self.assertDictEqual(elements_to_dict(copy), elements_to_dict(branch)) copy = branch.copy_me() copy.elements[1].base_element.n_components = 3 self.assertNotEqual(copy.elements[1].base_element.n_components, branch.elements[1].base_element.n_components) fake_copy = branch fake_copy.elements[1].base_element.n_components = 3 self.assertEqual(fake_copy.elements[1].base_element.n_components, branch.elements[1].base_element.n_components)
def test_add(self): stack = Stack('MyStack', [ PipelineElement('PCA', {'n_components': [5]}), PipelineElement('FastICA') ]) self.assertEqual(len(stack.elements), 2) self.assertDictEqual(stack._hyperparameters, {'MyStack__PCA__n_components': [5]}) stack = Stack('MyStack') stack += PipelineElement('PCA', {'n_components': [5]}) stack += PipelineElement('FastICA') self.assertEqual(len(stack.elements), 2) self.assertDictEqual(stack._hyperparameters, {'MyStack__PCA__n_components': [5]}) def callback(X, y=None): pass stack = Stack('MyStack', [ PipelineElement('PCA'), CallbackElement('MyCallback', callback), Switch('MySwitch', [PipelineElement('PCA'), PipelineElement('FastICA')]), Branch('MyBranch', [PipelineElement('PCA')]) ]) self.assertEqual(len(stack.elements), 4) # test doubled item with self.assertRaises(ValueError): stack += stack.elements[0] stack += PipelineElement('PCA', {'n_components': [10, 20]}) self.assertEqual(stack.elements[-1].name, 'PCA2') self.assertDictEqual( stack.hyperparameters, { 'MyStack__MySwitch__current_element': [(0, 0), (1, 0)], 'MyStack__PCA2__n_components': [10, 20] })
def test_set_random_state(self): # we handle all elements in one method that is inherited so we capture them all in this test random_state = 53 my_branch = Branch("random_state_branch") my_branch += PipelineElement("StandardScaler") my_switch = Switch("transformer_Switch") my_switch += PipelineElement("LassoFeatureSelection") my_switch += PipelineElement("PCA") my_branch += my_switch my_stack = Stack("Estimator_Stack") my_stack += PipelineElement("SVR") my_stack += PipelineElement("Ridge") my_branch += my_stack my_branch += PipelineElement("ElasticNet") my_branch.random_state = random_state self.assertTrue(my_switch.elements[1].random_state == random_state) self.assertTrue( my_switch.elements[1].base_element.random_state == random_state) self.assertTrue(my_stack.elements[1].random_state == random_state) self.assertTrue( my_stack.elements[1].base_element.random_state == random_state)
neuro_branch += PipelineElement( "BrainAtlas", hyperparameters={}, rois=["Hippocampus_L", "Hippocampus_R", "Amygdala_L", "Amygdala_R"], atlas_name="AAL", extract_mode="vec", batch_size=20, ) # finally, add your neuro branch to your hyperpipe neuro_branch += CallbackElement("NeuroCallback", my_monitor) my_pipe += neuro_branch # my_pipe += CallbackElement('NeuroCallback', my_monitor) # now, add standard ML algorithms to your liking feature_engineering = Branch("FeatureEngineering") feature_engineering += PipelineElement("StandardScaler") my_pipe += feature_engineering my_pipe += CallbackElement("FECallback", my_monitor) my_pipe += PipelineElement( "SVR", hyperparameters={"kernel": Categorical(["rbf", "linear"])}, gamma="scale" ) # NOW TRAIN YOUR PIPELINE start_time = time.time() my_pipe.fit(X, y) elapsed_time = time.time() - start_time print(time.strftime("%H:%M:%S", time.gmtime(elapsed_time)))
from photonai.base import Hyperpipe, PipelineElement, Stack, Branch, OutputSettings from photonai.optimization import IntegerRange, Categorical X, y = load_breast_cancer(True) my_pipe = Hyperpipe('basic_stacking', optimizer='grid_search', metrics=['accuracy', 'precision', 'recall'], best_config_metric='f1_score', outer_cv=KFold(n_splits=3), inner_cv=KFold(n_splits=10), verbosity=1, output_settings=OutputSettings(project_folder='./tmp/')) # BRANCH WITH QUANTILTRANSFORMER AND DECISIONTREECLASSIFIER tree_qua_branch = Branch('tree_branch') tree_qua_branch += PipelineElement('QuantileTransformer') tree_qua_branch += PipelineElement('DecisionTreeClassifier', {'min_samples_split': IntegerRange(2, 4)}, criterion='gini') # BRANCH WITH MinMaxScaler AND DecisionTreeClassifier svm_mima_branch = Branch('svm_branch') svm_mima_branch += PipelineElement('MinMaxScaler') svm_mima_branch += PipelineElement('SVC', { 'kernel': Categorical(['rbf', 'linear']), 'C': IntegerRange(0.01, 2.0) }, gamma='auto') # BRANCH WITH StandardScaler AND KNeighborsClassifier
my_pipe = Hyperpipe('data_integration', optimizer='random_grid_search', optimizer_params={'n_configurations': 2}, metrics=['accuracy', 'precision', 'recall'], best_config_metric='f1_score', outer_cv=KFold(n_splits=3), inner_cv=KFold(n_splits=3), verbosity=1, output_settings=OutputSettings(project_folder='./tmp/')) my_pipe += PipelineElement('SimpleImputer') my_pipe += PipelineElement('StandardScaler', {}, with_mean=True) # Use only "mean" features: [mean_radius, mean_texture, mean_perimeter, mean_area, mean_smoothness, mean_compactness, # mean_concavity, mean_concave_points, mean_symmetry, mean_fractal_dimension mean_branch = Branch('MeanFeature') mean_branch += DataFilter(indices=np.arange(10)) mean_branch += PipelineElement('SVC', {'C': FloatRange(0.1, 10)}, kernel='linear') # Use only "error" features error_branch = Branch('ErrorFeature') error_branch += DataFilter(indices=np.arange(10, 20)) error_branch += PipelineElement('SVC', {'C': Categorical([100, 1000, 1000])}, kernel='linear') # use only "worst" features: [worst_radius, worst_texture, ..., worst_fractal_dimension] worst_branch = Branch('WorstFeature') worst_branch += DataFilter(indices=np.arange(20, 30)) worst_branch += PipelineElement('SVC')
def test_classification_8(self): for original_hyperpipe in self.hyperpipes: pipe = original_hyperpipe.copy_me() pipe += PipelineElement("StandardScaler") # setup pipeline branches with half of the features each # if both PCAs are disabled, features are simply concatenated and passed to the final estimator source1_branch = Branch("source1_features") # first half of features (for Boston Housing, same as indices=[0, 1, 2, 3, 4, 5] source1_branch += DataFilter(indices=np.arange( start=0, stop=int(np.floor(self.X_shape[1] / 2)))) source1_branch += PipelineElement( "ConfounderRemoval", {}, standardize_covariates=True, test_disabled=True, confounder_names=["cov1", "cov2"], ) source1_branch += PipelineElement( "PCA", hyperparameters={"n_components": Categorical([None, 5])}, test_disabled=True, ) source2_branch = Branch("source2_features") # second half of features (for Boston Housing, same is indices=[6, 7, 8, 9, 10, 11, 12] source2_branch += DataFilter(indices=np.arange( start=int(np.floor(self.X_shape[1] / 2)), stop=self.X_shape[1])) source2_branch += PipelineElement( "ConfounderRemoval", {}, standardize_covariates=True, test_disabled=True, confounder_names=["cov1", "cov2"], ) source2_branch += PipelineElement( "PCA", hyperparameters={"n_components": Categorical([None, 5])}, test_disabled=True, ) # setup source branches and stack their output (i.e. horizontal concatenation) pipe += Stack("source_stack", elements=[source1_branch, source2_branch]) # final estimator with stack output as features # setup estimator switch and add it to the pipe switch = Switch("estimator_switch") switch += PipelineElement( "SVC", hyperparameters={ "kernel": Categorical(["linear", "rbf"]), "C": Categorical([0.01, 1, 5]), }, ) switch += PipelineElement( "RandomForestClassifier", hyperparameters={ "min_samples_split": FloatRange(start=0.05, step=0.1, stop=0.26, range_type="range") }, ) pipe += switch self.run_hyperpipe(pipe, self.classification)
def test_save_optimum_pipe(self): # todo: test .save() of custom model tmp_path = os.path.join(self.tmp_folder_path, "optimum_pipypipe") settings = OutputSettings(project_folder=tmp_path, overwrite_results=True) my_pipe = Hyperpipe( "hyperpipe", optimizer="random_grid_search", optimizer_params={"n_configurations": 3}, metrics=["accuracy", "precision", "recall"], best_config_metric="f1_score", outer_cv=KFold(n_splits=2), inner_cv=KFold(n_splits=2), verbosity=1, output_settings=settings, ) preproc = Preprocessing() preproc += PipelineElement("StandardScaler") # BRANCH WITH QUANTILTRANSFORMER AND DECISIONTREECLASSIFIER tree_qua_branch = Branch("tree_branch") tree_qua_branch += PipelineElement("QuantileTransformer") tree_qua_branch += PipelineElement( "DecisionTreeClassifier", {"min_samples_split": IntegerRange(2, 4)}, criterion="gini", ) # BRANCH WITH MinMaxScaler AND DecisionTreeClassifier svm_mima_branch = Branch("svm_branch") svm_mima_branch += PipelineElement("MinMaxScaler") svm_mima_branch += PipelineElement( "SVC", {"kernel": Categorical(["rbf", "linear"]), "C": 2.0}, gamma="auto" ) # BRANCH WITH StandardScaler AND KNeighborsClassifier knn_sta_branch = Branch("neighbour_branch") knn_sta_branch += PipelineElement.create("dummy", DummyTransformer(), {}) knn_sta_branch += PipelineElement("KNeighborsClassifier") my_pipe += preproc # voting = True to mean the result of every branch my_pipe += Stack( "final_stack", [tree_qua_branch, svm_mima_branch, knn_sta_branch] ) my_pipe += PipelineElement("LogisticRegression", solver="lbfgs") my_pipe.fit(self.__X, self.__y) model_path = os.path.join( my_pipe.output_settings.results_folder, "photon_best_model.photon" ) self.assertTrue(os.path.exists(model_path)) # now move optimum pipe to new folder test_folder = os.path.join( my_pipe.output_settings.results_folder, "new_test_folder" ) new_model_path = os.path.join(test_folder, "photon_best_model.photon") os.makedirs(test_folder) shutil.copyfile(model_path, new_model_path) # check if load_optimum_pipe also works # check if we have the meta information recovered loaded_optimum_pipe = Hyperpipe.load_optimum_pipe(new_model_path) self.assertIsNotNone(loaded_optimum_pipe._meta_information) self.assertIsNotNone(loaded_optimum_pipe._meta_information["photon_version"]) # check if predictions stay realiably the same y_pred_loaded = loaded_optimum_pipe.predict(self.__X) y_pred = my_pipe.optimum_pipe.predict(self.__X) np.testing.assert_array_equal(y_pred_loaded, y_pred)
X, y = load_breast_cancer(True) my_pipe = Hyperpipe( "basic_stacking", optimizer="grid_search", metrics=["accuracy", "precision", "recall"], best_config_metric="f1_score", outer_cv=KFold(n_splits=3), inner_cv=KFold(n_splits=10), verbosity=1, output_settings=OutputSettings(project_folder="./tmp/"), ) # BRANCH WITH QUANTILTRANSFORMER AND DECISIONTREECLASSIFIER tree_qua_branch = Branch("tree_branch") tree_qua_branch += PipelineElement("QuantileTransformer") tree_qua_branch += PipelineElement( "DecisionTreeClassifier", {"min_samples_split": IntegerRange(2, 4)}, criterion="gini", ) # BRANCH WITH MinMaxScaler AND DecisionTreeClassifier svm_mima_branch = Branch("svm_branch") svm_mima_branch += PipelineElement("MinMaxScaler") svm_mima_branch += PipelineElement( "SVC", { "kernel": Categorical(["rbf", "linear"]), "C": IntegerRange(0.01, 2.0)
# additionally, you can smooth the entire image neuro_branch += PipelineElement('SmoothImages', {'fwhm': Categorical([6, 8])}, batch_size=20) # now, apply a brain atlas and extract 4 ROIs # set "extract_mode" to "vec" so that all voxels within these ROIs are vectorized and concatenated neuro_branch += PipelineElement('BrainAtlas', hyperparameters={}, rois=['Hippocampus_L', 'Hippocampus_R', 'Amygdala_L', 'Amygdala_R'], atlas_name="AAL", extract_mode='vec', batch_size=20) # finally, add your neuro branch to your hyperpipe neuro_branch += CallbackElement('NeuroCallback', my_monitor) my_pipe += neuro_branch # my_pipe += CallbackElement('NeuroCallback', my_monitor) # now, add standard ML algorithms to your liking feature_engineering = Branch('FeatureEngineering') feature_engineering += PipelineElement('StandardScaler') my_pipe += feature_engineering my_pipe += CallbackElement('FECallback', my_monitor) my_pipe += PipelineElement('SVR', hyperparameters={'kernel': Categorical(['rbf', 'linear'])}, gamma='scale') # NOW TRAIN YOUR PIPELINE start_time = time.time() my_pipe.fit(X, y) elapsed_time = time.time() - start_time print(time.strftime("%H:%M:%S", time.gmtime(elapsed_time))) debug = True
def setUp(self): def callback(X, y=None, **kwargs): self.assertEqual(X.shape, (569, 30)) print("Shape of transformed data: {}".format(X.shape)) def predict_callback(X, y=None, **kwargs): self.assertEqual(X.shape, (569, )) print('Shape of predictions: {}'.format(X.shape)) def callback_test_equality(X, y=None, **kwargs): self.assertTrue(np.array_equal(self.X, X)) if y is not None: self.assertListEqual(self.y.tolist(), y.tolist()) self.X, self.y = load_breast_cancer(True) self.clean_pipeline = PhotonPipeline( elements=[('PCA', PipelineElement('PCA')), ('LogisticRegression', PipelineElement('LogisticRegression'))]) self.callback_pipeline = PhotonPipeline(elements=[( 'First', CallbackElement('First', callback)), ( 'PCA', PipelineElement('PCA') ), ('Second', CallbackElement('Second', callback) ), ('LogisticRegression', PipelineElement('LogisticRegression'))]) self.clean_branch_pipeline = PhotonPipeline( elements=[('MyBranch', Branch('MyBranch', [PipelineElement('PCA')])), ('LogisticRegression', PipelineElement('LogisticRegression'))]) self.callback_branch_pipeline = PhotonPipeline( elements=[('First', CallbackElement('First', callback)), ('MyBranch', Branch('MyBranch', [ CallbackElement('Second', callback), PipelineElement('PCA') ])), ('Fourth', CallbackElement('Fourth', callback)), ('LogisticRegression', PipelineElement('LogisticRegression'))]) self.callback_branch_pipeline_error = PhotonPipeline( elements=[('First', CallbackElement('First', callback)), ('MyBranch', Branch('MyBranch', [ CallbackElement('Second', callback), PipelineElement('PCA'), CallbackElement('Third', callback) ])), ('Fourth', CallbackElement('Fourth', callback)), ('LogisticRegression', PipelineElement('LogisticRegression') ), ('Fifth', CallbackElement('Fifth', predict_callback))]) # test that data is unaffected from pipeline self.callback_after_callback_pipeline = PhotonPipeline([ ('Callback1', CallbackElement('Callback1', callback)), ('Callback2', CallbackElement('Callback2', callback_test_equality)), ('StandarcScaler', PipelineElement('StandardScaler'), ('SVR', PipelineElement('SVR'))) ])