class BatchingTests(unittest.TestCase): def setUp(self): self.batch_size = 10 nr_features = 3 origin_list = ["affe", "tiger", "schwein", "giraffe", "löwe"] self.data = None self.targets = None self.neuro_batch = PipelineElement( "dummy_batch", batch_size=self.batch_size, base_element=DummyBatchTransformer()) for element in origin_list: features = [element + str(i) for i in range(0, nr_features)] if self.data is None: self.data = np.array([features] * self.batch_size) else: self.data = np.vstack( (self.data, [features] * self.batch_size)) if self.targets is None: self.targets = np.array([element] * self.batch_size) else: self.targets = np.hstack( (self.targets, [element] * self.batch_size)) self.data = np.array(self.data) self.targets = np.array(self.targets) self.kwargs = {"animals": self.targets} def test_transform(self): X_new, y_new, kwargs_new = self.neuro_batch.transform( self.data, self.targets, **self.kwargs) self.assertListEqual(X_new[0, :].tolist(), ["affe0affe", "affe1affe", "affe2affe"]) self.assertListEqual(X_new[49, :].tolist(), ["löwe0löwe", "löwe1löwe", "löwe2löwe"]) self.assertEqual(kwargs_new["animals"][0], "effa") self.assertEqual(kwargs_new["animals"][49], "ewöl") with self.assertRaises(Warning): self.neuro_batch.transform('str', [0]) def test_predict(self): y_predicted = self.neuro_batch.predict(self.data, **self.kwargs) # assure that predict is batch wisely called self.assertEqual(y_predicted[0], 1) self.assertEqual(y_predicted[-1], (self.data.shape[0] / self.batch_size)) with self.assertRaises(Warning): self.neuro_batch.predict('str')
class SwitchTests(unittest.TestCase): def setUp(self): self.X, self.y = load_breast_cancer(True) self.svc = PipelineElement('SVC', { 'C': [0.1, 1], 'kernel': ['rbf', 'sigmoid'] }) self.tree = PipelineElement('DecisionTreeClassifier', {'min_samples_split': [2, 3, 4]}) self.gpc = PipelineElement('GaussianProcessClassifier') self.pca = PipelineElement('PCA') self.estimator_branch = Branch('estimator_branch', [self.tree.copy_me()]) self.transformer_branch = Branch('transformer_branch', [self.pca.copy_me()]) self.estimator_switch = Switch( 'estimator_switch', [self.svc.copy_me(), self.tree.copy_me(), self.gpc.copy_me()]) self.estimator_switch_with_branch = Switch( 'estimator_switch_with_branch', [self.tree.copy_me(), self.estimator_branch.copy_me()]) self.transformer_switch_with_branch = Switch( 'transformer_switch_with_branch', [self.pca.copy_me(), self.transformer_branch.copy_me()]) self.switch_in_switch = Switch('Switch_in_switch', [ self.transformer_branch.copy_me(), self.transformer_switch_with_branch.copy_me() ]) def test_init(self): self.assertEqual(self.estimator_switch.name, 'estimator_switch') def test_hyperparams(self): # assert number of different configs to test # each config combi for each element: 4 for SVC and 3 for logistic regression = 7 self.assertEqual( len(self.estimator_switch.pipeline_element_configurations), 3) self.assertEqual( len(self.estimator_switch.pipeline_element_configurations[0]), 4) self.assertEqual( len(self.estimator_switch.pipeline_element_configurations[1]), 3) # hyperparameters self.assertDictEqual( self.estimator_switch.hyperparameters, { 'estimator_switch__current_element': [(0, 0), (0, 1), (0, 2), (0, 3), (1, 0), (1, 1), (1, 2), (2, 0)] }) # config grid self.assertListEqual(self.estimator_switch.generate_config_grid(), [{ 'estimator_switch__current_element': (0, 0) }, { 'estimator_switch__current_element': (0, 1) }, { 'estimator_switch__current_element': (0, 2) }, { 'estimator_switch__current_element': (0, 3) }, { 'estimator_switch__current_element': (1, 0) }, { 'estimator_switch__current_element': (1, 1) }, { 'estimator_switch__current_element': (1, 2) }, { 'estimator_switch__current_element': (2, 0) }]) def test_set_params(self): # test for grid search false_config = {'current_element': 1} with self.assertRaises(ValueError): self.estimator_switch.set_params(**false_config) correct_config = {'current_element': (0, 1)} self.estimator_switch.set_params(**correct_config) self.assertEqual(self.estimator_switch.base_element.base_element.C, 0.1) self.assertEqual( self.estimator_switch.base_element.base_element.kernel, 'sigmoid') # test for other optimizers smac_config = {'SVC__C': 2, 'SVC__kernel': 'rbf'} self.estimator_switch.set_params(**smac_config) self.assertEqual(self.estimator_switch.base_element.base_element.C, 2) self.assertEqual( self.estimator_switch.base_element.base_element.kernel, 'rbf') def test_fit(self): np.random.seed(42) self.estimator_switch.set_params(**{'current_element': (1, 0)}) self.estimator_switch.fit(self.X, self.y) np.random.seed(42) self.tree.set_params(**{'min_samples_split': 2}) self.tree.fit(self.X, self.y) np.testing.assert_array_equal( self.tree.base_element.feature_importances_, self.estimator_switch.base_element.feature_importances_) def test_transform(self): self.transformer_switch_with_branch.set_params( **{'current_element': (0, 0)}) self.transformer_switch_with_branch.fit(self.X, self.y) self.pca.fit(self.X, self.y) switch_Xt, _, _ = self.transformer_switch_with_branch.transform(self.X) pca_Xt, _, _ = self.pca.transform(self.X) self.assertTrue(np.array_equal(pca_Xt, switch_Xt)) def test_predict(self): self.estimator_switch.set_params(**{'current_element': (1, 0)}) np.random.seed(42) self.estimator_switch.fit(self.X, self.y) self.tree.set_params(**{'min_samples_split': 2}) np.random.seed(42) self.tree.fit(self.X, self.y) switch_preds = self.estimator_switch.predict(self.X) tree_preds = self.tree.predict(self.X) self.assertTrue(np.array_equal(switch_preds, tree_preds)) def test_predict_proba(self): gpc = PipelineElement('GaussianProcessClassifier') svc = PipelineElement('SVC') switch = Switch('EstimatorSwitch', [gpc, svc]) switch.set_params(**{'current_element': (0, 0)}) np.random.seed(42) switch_probas = switch.fit(self.X, self.y).predict_proba(self.X) np.random.seed(42) gpr_probas = self.gpc.fit(self.X, self.y).predict_proba(self.X) self.assertTrue(np.array_equal(switch_probas, gpr_probas)) def test_inverse_transform(self): self.transformer_switch_with_branch.set_params( **{'current_element': (0, 0)}) self.transformer_switch_with_branch.fit(self.X, self.y) self.pca.fit(self.X, self.y) Xt_pca, _, _ = self.pca.transform(self.X) Xt_switch, _, _ = self.transformer_switch_with_branch.transform(self.X) X_pca, _, _ = self.pca.inverse_transform(Xt_pca) X_switch, _, _ = self.transformer_switch_with_branch.inverse_transform( Xt_switch) self.assertTrue(np.array_equal(Xt_pca, Xt_switch)) self.assertTrue(np.array_equal(X_pca, X_switch)) np.testing.assert_almost_equal(X_switch, self.X) def test_base_element(self): switch = Switch('switch', [self.svc, self.tree]) switch.set_params(**{'current_element': (1, 1)}) self.assertIs(switch.base_element, self.tree) self.assertIs(switch.base_element.base_element, self.tree.base_element) # other optimizer switch.set_params(**{'DecisionTreeClassifier__min_samples_split': 2}) self.assertIs(switch.base_element, self.tree) self.assertIs(switch.base_element.base_element, self.tree.base_element) def test_copy_me(self): switches = [ self.estimator_switch, self.estimator_switch_with_branch, self.transformer_switch_with_branch, self.switch_in_switch ] for switch in switches: copy = switch.copy_me() self.assertEqual(switch.random_state, copy.random_state) for i, element in enumerate(copy.elements): self.assertNotEqual(copy.elements[i], switch.elements[i]) switch = elements_to_dict(switch) copy = elements_to_dict(copy) self.assertDictEqual(copy, switch) def test_estimator_type(self): pca = PipelineElement('PCA') ica = PipelineElement('FastICA') svc = PipelineElement('SVC') svr = PipelineElement('SVR') tree_class = PipelineElement('DecisionTreeClassifier') tree_reg = PipelineElement('DecisionTreeRegressor') switch = Switch('MySwitch', [pca, svr]) with self.assertRaises(NotImplementedError): est_type = switch._estimator_type switch = Switch('MySwitch', [svc, svr]) with self.assertRaises(NotImplementedError): est_type = switch._estimator_type switch = Switch('MySwitch', [pca, ica]) self.assertEqual(switch._estimator_type, None) switch = Switch('MySwitch', [tree_class, svc]) self.assertEqual(switch._estimator_type, 'classifier') switch = Switch('MySwitch', [tree_reg, svr]) self.assertEqual(switch._estimator_type, 'regressor') self.assertEqual(self.estimator_switch._estimator_type, 'classifier') self.assertEqual(self.estimator_switch_with_branch._estimator_type, 'classifier') self.assertEqual(self.transformer_switch_with_branch._estimator_type, None) self.assertEqual(self.switch_in_switch._estimator_type, None) def test_add(self): self.assertEqual(len(self.estimator_switch.elements), 3) self.assertEqual(len(self.switch_in_switch.elements), 2) self.assertEqual(len(self.transformer_switch_with_branch.elements), 2) self.assertEqual( list(self.estimator_switch.elements_dict.keys()), ['SVC', 'DecisionTreeClassifier', 'GaussianProcessClassifier']) self.assertEqual( list(self.switch_in_switch.elements_dict.keys()), ['transformer_branch', 'transformer_switch_with_branch']) switch = Switch('MySwitch', [PipelineElement('PCA'), PipelineElement('FastICA')]) switch = Switch('MySwitch2') switch += PipelineElement('PCA') switch += PipelineElement('FastICA') # test doubled names with self.assertRaises(ValueError): self.estimator_switch += self.estimator_switch.elements[0] self.estimator_switch += PipelineElement("SVC") self.assertEqual(self.estimator_switch.elements[-1].name, "SVC2") self.estimator_switch += PipelineElement( "SVC", hyperparameters={'kernel': ['polynomial', 'sigmoid']}) self.assertEqual(self.estimator_switch.elements[-1].name, "SVC3") self.estimator_switch += PipelineElement("SVR") self.assertEqual(self.estimator_switch.elements[-1].name, "SVR") self.estimator_switch += PipelineElement("SVC") self.assertEqual(self.estimator_switch.elements[-1].name, "SVC4") # check that hyperparameters are renamed respectively self.assertEqual( self.estimator_switch.pipeline_element_configurations[4][0] ["SVC3__kernel"], 'polynomial') def test_feature_importances(self): self.estimator_switch.set_params(**{'current_element': (1, 0)}) self.estimator_switch.fit(self.X, self.y) self.assertTrue( len(self.estimator_switch.feature_importances_) == self.X.shape[1]) self.estimator_switch_with_branch.set_params( **{'current_element': (1, 0)}) self.estimator_switch_with_branch.fit(self.X, self.y) self.assertTrue( len(self.estimator_switch_with_branch.feature_importances_) == self.X.shape[1]) self.estimator_switch.set_params(**{'current_element': (2, 0)}) self.estimator_switch.fit(self.X, self.y) self.assertIsNone(self.estimator_branch.feature_importances_) self.switch_in_switch.set_params(**{'current_element': (1, 0)}) self.switch_in_switch.fit(self.X, self.y) self.assertIsNone(self.switch_in_switch.feature_importances_) self.estimator_switch.set_params(**{'current_element': (1, 0)}) self.switch_in_switch.fit(self.X, self.y) self.assertIsNone(self.switch_in_switch.feature_importances_)
class PipelineElementTests(unittest.TestCase): def setUp(self): self.pca_pipe_element = PipelineElement('PCA', {'n_components': [1, 2]}, test_disabled=True) self.svc_pipe_element = PipelineElement('SVC', { 'C': [0.1, 1], 'kernel': ['rbf', 'sigmoid'] }) self.X, self.y = load_breast_cancer(True) self.kwargs = {'covariates': self.y} self.Xt = self.X + 1 self.yt = self.y + 1 self.kwargst = {'covariates': self.y + 1} def test_create_failure(self): with self.assertRaises(NameError): PipelineElement('NONSENSEName', {}) def test_pipeline_element_create(self): # test name, set_disabled and base_element self.assertIsInstance(self.pca_pipe_element.base_element, PCA) # set_disabled is passed correctly self.assertTrue(self.pca_pipe_element.test_disabled) # correct name self.assertEqual(self.pca_pipe_element.name, 'PCA') def test_fit(self): self.pca_pipe_element.fit(self.X, self.y) self.assertEqual(self.pca_pipe_element.base_element.components_.shape, (30, 30)) self.assertEqual(self.pca_pipe_element.base_element.components_[0, 0], 0.005086232018734175) self.svc_pipe_element.fit(self.X, self.y) self.assertEqual(self.svc_pipe_element.base_element._intercept_, -0.3753900173819406) def test_transform(self): self.pca_pipe_element.fit(self.X, self.y) Xt, _, _ = self.pca_pipe_element.transform(self.X) self.assertEqual(Xt.shape, (569, 30)) self.assertAlmostEqual(Xt[0, 0], 1160.1425737041347) def test_predict(self): self.svc_pipe_element.fit(self.X, self.y) yt = self.svc_pipe_element.predict(self.X) self.assertEqual(yt.shape, (569, )) self.assertEqual(yt[21], 1) def test_predict_proba(self): self.svc_pipe_element.fit(self.X, self.y) self.assertEqual(self.svc_pipe_element.predict_proba(self.X), None) gpc = PipelineElement('GaussianProcessClassifier') gpc.fit(self.X, self.y) self.assertTrue( np.array_equal( gpc.predict_proba(self.X)[0], np.asarray([0.5847072926551391, 0.4152927073448609]))) def test_inverse_transform(self): Xt, _, _ = self.pca_pipe_element.fit(self.X, self.y).transform(self.X) X, _, _ = self.pca_pipe_element.inverse_transform(Xt) np.testing.assert_array_almost_equal(X, self.X) def test_one_hyperparameter_setup(self): # sklearn attributes are generated self.assertDictEqual(self.pca_pipe_element.hyperparameters, { 'PCA__n_components': [1, 2], 'PCA__disabled': [False, True] }) # config_grid is created as expected self.assertListEqual(self.pca_pipe_element.generate_config_grid(), [{ 'PCA__n_components': 1, 'PCA__disabled': False }, { 'PCA__n_components': 2, 'PCA__disabled': False }, { 'PCA__disabled': True }]) def test_more_hyperparameters_setup(self): # sklearn attributes are generated self.assertDictEqual(self.svc_pipe_element.hyperparameters, { 'SVC__C': [0.1, 1], 'SVC__kernel': ['rbf', 'sigmoid'] }) # config_grid is created as expected self.assertListEqual(self.svc_pipe_element.generate_config_grid(), [{ 'SVC__C': 0.1, 'SVC__kernel': 'rbf' }, { 'SVC__C': 0.1, 'SVC__kernel': 'sigmoid' }, { 'SVC__C': 1, 'SVC__kernel': 'rbf' }, { 'SVC__C': 1, 'SVC__kernel': 'sigmoid' }]) def test_no_hyperparameters(self): pca_sklearn_element = PCA() pca_photon_element = PipelineElement('PCA') self.assertDictEqual(pca_sklearn_element.__dict__, pca_photon_element.base_element.__dict__) def test_set_params(self): config = {'n_components': 3, 'disabled': False} self.pca_pipe_element.set_params(**config) self.assertFalse(self.pca_pipe_element.disabled) self.assertEqual(self.pca_pipe_element.base_element.n_components, 3) with self.assertRaises(ValueError): self.pca_pipe_element.set_params(**{'any_weird_param': 1}) def test_set_random_state(self): # we handle all elements in one method that is inherited so we capture them all in this test random_state = 53 my_branch = Branch("random_state_branch") my_branch += PipelineElement("StandardScaler") my_switch = Switch("transformer_Switch") my_switch += PipelineElement("LassoFeatureSelection") my_switch += PipelineElement("PCA") my_branch += my_switch my_stack = Stack("Estimator_Stack") my_stack += PipelineElement("SVR") my_stack += PipelineElement("Ridge") my_branch += my_stack my_branch += PipelineElement("ElasticNet") my_branch.random_state = random_state self.assertTrue(my_switch.elements[1].random_state == random_state) self.assertTrue( my_switch.elements[1].base_element.random_state == random_state) self.assertTrue(my_stack.elements[1].random_state == random_state) self.assertTrue( my_stack.elements[1].base_element.random_state == random_state) def test_adjusted_delegate_call_transformer(self): # check standard transformer trans = PipelineElement.create('Transformer', base_element=DummyTransformer(), hyperparameters={}) X, y, kwargs = trans.transform(self.X, self.y, **self.kwargs) self.assertTrue(np.array_equal( X, self.Xt)) # only X should be transformed self.assertTrue(np.array_equal(y, self.y)) self.assertDictEqual(kwargs, self.kwargs) # check transformer needs y trans = PipelineElement.create('NeedsYTransformer', base_element=DummyNeedsYTransformer(), hyperparameters={}) X, y, kwargs = trans.transform(self.X, self.y, **self.kwargs) self.assertTrue(np.array_equal(X, self.Xt)) self.assertTrue(np.array_equal(y, self.yt)) self.assertDictEqual(kwargs, self.kwargs) trans = PipelineElement.create('NeedsYTransformer', base_element=DummyNeedsYTransformer(), hyperparameters={}) X, y, kwargs = trans.transform(self.X, self.y) # this time without any kwargs self.assertTrue(np.array_equal(X, self.Xt)) self.assertTrue(np.array_equal(y, self.yt)) self.assertDictEqual(kwargs, {}) # check transformer needs covariates trans = PipelineElement.create( 'NeedsCovariatesTransformer', base_element=DummyNeedsCovariatesTransformer(), hyperparameters={}) X, y, kwargs = trans.transform(self.X, **self.kwargs) self.assertTrue(np.array_equal(X, self.Xt)) self.assertTrue( np.array_equal(kwargs['covariates'], self.kwargst['covariates'])) self.assertEqual(y, None) # check transformer needs covariates and needs y trans = PipelineElement.create( 'NeedsCovariatesAndYTransformer', base_element=DummyNeedsCovariatesAndYTransformer(), hyperparameters={}) X, y, kwargs = trans.transform(self.X, self.y, **self.kwargs) self.assertTrue(np.array_equal(X, self.Xt)) self.assertTrue(np.array_equal(y, self.yt)) self.assertTrue( np.array_equal(kwargs['covariates'], self.kwargst['covariates'])) def test_adjusted_delegate_call_estimator(self): # check standard estimator est = PipelineElement.create('Estimator', base_element=DummyEstimator(), hyperparameters={}) y = est.predict(self.X) self.assertTrue(np.array_equal( y, self.Xt)) # DummyEstimator returns X as y predictions # check estimator needs covariates est = PipelineElement.create( 'Estimator', base_element=DummyNeedsCovariatesEstimator(), hyperparameters={}) X = est.predict(self.X, **self.kwargs) self.assertTrue(np.array_equal( X, self.Xt)) # DummyEstimator returns X as y predictions def test_predict_when_no_transform(self): # check standard estimator est = PipelineElement.create('Estimator', base_element=DummyEstimator(), hyperparameters={}) X, y, kwargs = est.transform(self.X) self.assertTrue(np.array_equal( X, self.Xt)) # DummyEstimator returns X as y predictions self.assertEqual(y, None) # check estimator needs covariates est = PipelineElement.create( 'Estimator', base_element=DummyNeedsCovariatesEstimator(), hyperparameters={}) X, y, kwargs = est.transform(self.X, **self.kwargs) self.assertTrue(np.array_equal( X, self.Xt)) # DummyEstimator returns X as y predictions self.assertTrue( np.array_equal(kwargs['covariates'], self.kwargs['covariates'])) self.assertEqual(y, None) def test_copy_me(self): svc = PipelineElement('SVC', { 'C': [0.1, 1], 'kernel': ['rbf', 'sigmoid'] }) svc.set_params(**{'C': 0.1, 'kernel': 'sigmoid'}) copy = svc.copy_me() self.assertEqual(svc.random_state, copy.random_state) self.assertNotEqual(copy.base_element, svc.base_element) self.assertDictEqual(elements_to_dict(copy), elements_to_dict(svc)) self.assertEqual(copy.base_element.C, svc.base_element.C) # check if copies are still the same, even when making a copy of a fitted PipelineElement copy_after_fit = svc.fit(self.X, self.y).copy_me() self.assertDictEqual(elements_to_dict(copy), elements_to_dict(copy_after_fit)) svc = PipelineElement('SVC', { 'C': [0.1, 1], 'kernel': ['rbf', 'sigmoid'] }) copy = svc.copy_me() self.assertDictEqual(copy.hyperparameters, { 'SVC__C': [0.1, 1], 'SVC__kernel': ['rbf', 'sigmoid'] }) copy.base_element.C = 3 self.assertNotEqual(svc.base_element.C, copy.base_element.C) # test custom element custom_element = PipelineElement.create( 'CustomElement', base_element=DummyNeedsCovariatesEstimator(), hyperparameters={}) copy = custom_element.copy_me() self.assertDictEqual(elements_to_dict(custom_element), elements_to_dict(copy)) custom_element2 = PipelineElement.create( 'MyUnDeepcopyableObject', base_element=GridSearchOptimizer(), hyperparameters={}) with self.assertRaises(Exception): custom_element2.copy_me() def test_estimator_type(self): estimator = PipelineElement('SVC') self.assertEqual(estimator._estimator_type, 'classifier') estimator = PipelineElement('SVR') self.assertEqual(estimator._estimator_type, 'regressor') estimator = PipelineElement('PCA') self.assertEqual(estimator._estimator_type, None) estimator = PipelineElement.create('Dummy', DummyEstimatorWrongType(), {}) with self.assertRaises(NotImplementedError): est_type = estimator._estimator_type estimator = PipelineElement.create('Dummy', DummyTransformerWithPredict(), {}) with self.assertRaises(NotImplementedError): est_type = estimator._estimator_type estimator = PipelineElement.create('Dummy', DummyEstimatorNoPredict(), {}) with self.assertRaises(NotImplementedError): est_type = estimator._estimator_type def test_sanity_check_item_for_add(self): valid_type = PipelineElement('StandardScaler') valid_type2 = CallbackElement('my_callback', None) invalid_type = StandardScaler() invalid_type2 = Preprocessing() PipelineElement.sanity_check_element_type_for_building_photon_pipes( valid_type, PipelineElement) PipelineElement.sanity_check_element_type_for_building_photon_pipes( valid_type2, PipelineElement) with self.assertRaises(TypeError): PipelineElement.sanity_check_element_type_for_building_photon_pipes( invalid_type, PipelineElement) with self.assertRaises(TypeError): PipelineElement.sanity_check_element_type_for_building_photon_pipes( invalid_type2, PipelineElement) classes_to_test = [Stack, Switch, Branch, Preprocessing] for photon_class in classes_to_test: # we name it SVC so it suits all classes if photon_class is Preprocessing: instance = photon_class() else: instance = photon_class('tmp_instance') instance.add(valid_type) instance.add(valid_type2) with self.assertRaises(TypeError): instance.add(invalid_type) with self.assertRaises(TypeError): instance.add(invalid_type2)
class PipelineTests(PhotonBaseTest): def setUp(self): self.X, self.y = load_breast_cancer(True) # Photon Version self.p_pca = PipelineElement("PCA", {}, random_state=3) self.p_svm = PipelineElement("SVC", {}, random_state=3) self.p_ss = PipelineElement("StandardScaler", {}) self.p_dt = PipelineElement("DecisionTreeClassifier", random_state=3) dummy_element = DummyYAndCovariatesTransformer() self.dummy_photon_element = PipelineElement.create( "DummyTransformer", dummy_element, {}) self.sk_pca = PCA(random_state=3) self.sk_svc = SVC(random_state=3) self.sk_ss = StandardScaler() self.sk_dt = DecisionTreeClassifier(random_state=3) def test_regular_use(self): photon_pipe = PhotonPipeline([("PCA", self.p_pca), ("SVC", self.p_svm)]) photon_pipe.fit(self.X, self.y) photon_transformed_X, _, _ = photon_pipe.transform(self.X) photon_predicted_y = photon_pipe.predict(self.X) # the element is given by reference, so it should be fitted right here photon_ref_transformed_X, _, _ = self.p_pca.transform(self.X) photon_ref_predicted_y = self.p_svm.predict(photon_ref_transformed_X) self.assertTrue( np.array_equal(photon_transformed_X, photon_ref_transformed_X)) self.assertTrue( np.array_equal(photon_predicted_y, photon_ref_predicted_y)) sk_pipe = SKPipeline([("PCA", self.sk_pca), ("SVC", self.sk_svc)]) sk_pipe.fit(self.X, self.y) sk_predicted_y = sk_pipe.predict(self.X) self.assertTrue(np.array_equal(photon_predicted_y, sk_predicted_y)) # sklearn pipeline does not offer a transform function # sk_transformed_X = sk_pipe.transform(X) # self.assertTrue(np.array_equal(photon_transformed_X, sk_transformed_X)) def test_add_preprocessing(self): my_preprocessing = Preprocessing() my_preprocessing += PipelineElement("LabelEncoder") photon_pipe = PhotonPipeline([("PCA", self.p_pca), ("SVC", self.p_svm)]) photon_pipe._add_preprocessing(my_preprocessing) self.assertEqual(len(photon_pipe.named_steps), 3) first_element = photon_pipe.elements[0][1] self.assertTrue(first_element == my_preprocessing) self.assertTrue( photon_pipe.named_steps["Preprocessing"] == my_preprocessing) def test_no_estimator(self): no_estimator_pipe = PhotonPipeline([("StandardScaler", self.p_ss), ("PCA", self.p_pca)]) no_estimator_pipe.fit(self.X, self.y) photon_no_estimator_transform, _, _ = no_estimator_pipe.transform( self.X) photon_no_estimator_predict = no_estimator_pipe.predict(self.X) self.assertTrue( np.array_equal(photon_no_estimator_predict, photon_no_estimator_transform)) self.sk_ss.fit(self.X) standardized_data = self.sk_ss.transform(self.X) self.sk_pca.fit(standardized_data) pca_data = self.sk_pca.transform(standardized_data) self.assertTrue(np.array_equal(photon_no_estimator_transform, pca_data)) self.assertTrue(np.array_equal(photon_no_estimator_predict, pca_data)) def test_y_and_covariates_transformation(self): X = np.ones((200, 50)) y = np.ones((200, )) + 2 kwargs = {"sample1": np.ones((200, 5))} photon_pipe = PhotonPipeline([("DummyTransformer", self.dummy_photon_element)]) # if y is none all y transformer should be ignored Xt2, yt2, kwargst2 = photon_pipe.transform(X, None, **kwargs) self.assertTrue(np.array_equal(Xt2, X)) self.assertTrue(np.array_equal(yt2, None)) self.assertTrue(np.array_equal(kwargst2, kwargs)) # if y is given, all y transformers should be working Xt, yt, kwargst = photon_pipe.transform(X, y, **kwargs) # assure that data is delivered to element correctly self.assertTrue( np.array_equal(X, self.dummy_photon_element.base_element.X)) self.assertTrue( np.array_equal(y, self.dummy_photon_element.base_element.y)) self.assertTrue( np.array_equal( kwargs["sample1"], self.dummy_photon_element.base_element.kwargs["sample1"], )) # assure that data is transformed correctly self.assertTrue(np.array_equal(Xt, X - 1)) self.assertTrue(np.array_equal(yt, y + 1)) self.assertTrue("sample1_edit" in kwargst) self.assertTrue( np.array_equal(kwargst["sample1_edit"], kwargs["sample1"] + 5)) def test_predict_with_training_flag(self): # manually edit labels sk_pipe = SKPipeline([("SS", self.sk_ss), ("SVC", self.sk_svc)]) y_plus_one = self.y + 1 sk_pipe.fit(self.X, y_plus_one) sk_pred = sk_pipe.predict(self.X) # edit labels during pipeline p_pipe = PhotonPipeline([("SS", self.p_ss), ("YT", self.dummy_photon_element), ("SVC", self.p_svm)]) p_pipe.fit(self.X, self.y) p_pred = p_pipe.predict(self.X) sk_standardized_X = self.sk_ss.transform(self.X) input_of_y_transformer = self.dummy_photon_element.base_element.X self.assertTrue( np.array_equal(sk_standardized_X, input_of_y_transformer)) self.assertTrue(np.array_equal(sk_pred, p_pred)) def test_inverse_tansform(self): # simple pipe sk_pipe = SKPipeline([("SS", self.sk_ss), ("PCA", self.sk_pca)]) sk_pipe.fit(self.X, self.y) sk_transform = sk_pipe.transform(self.X) sk_inverse_transformed = sk_pipe.inverse_transform(sk_transform) photon_pipe = PhotonPipeline([("SS", self.p_ss), ("PCA", self.p_pca)]) photon_pipe.fit(self.X, self.y) p_transform, _, _ = photon_pipe.transform(self.X) p_inverse_transformed, _, _ = photon_pipe.inverse_transform( p_transform) self.assertTrue( np.array_equal(sk_inverse_transformed, p_inverse_transformed)) # now including stack stack = Stack("stack", [self.p_pca]) stack_pipeline = PhotonPipeline([ ("stack", stack), ("StandardScaler", PipelineElement("StandardScaler")), ("LinearSVC", PipelineElement("LinearSVC")), ]) stack_pipeline.fit(self.X, self.y) feature_importances = stack_pipeline.feature_importances_ inversed_data, _, _ = stack_pipeline.inverse_transform( feature_importances) self.assertEqual(inversed_data.shape[1], self.X.shape[1]) # Todo: add tests for kwargs def test_predict_proba(self): sk_pipe = SKPipeline([("SS", self.sk_ss), ("SVC", self.sk_dt)]) sk_pipe.fit(self.X, self.y) sk_proba = sk_pipe.predict_proba(self.X) photon_pipe = PhotonPipeline([("SS", self.p_ss), ("SVC", self.p_dt)]) photon_pipe.fit(self.X, self.y) photon_proba = photon_pipe.predict_proba(self.X) self.assertTrue(np.array_equal(sk_proba, photon_proba)) def test_copy_me(self): switch = Switch("my_copy_switch") switch += PipelineElement("StandardScaler") switch += PipelineElement("RobustScaler", test_disabled=True) stack = Stack("RandomStack") stack += PipelineElement("SVC") branch = Branch("Random_Branch") pca_hyperparameters = {"n_components": [5, 10]} branch += PipelineElement("PCA", hyperparameters=pca_hyperparameters) branch += PipelineElement("DecisionTreeClassifier") stack += branch photon_pipe = PhotonPipeline([ ("SimpleImputer", PipelineElement("SimpleImputer")), ("my_copy_switch", switch), ("RandomStack", stack), ("Callback1", CallbackElement("tmp_callback", np.mean)), ("PhotonVotingClassifier", PipelineElement("PhotonVotingClassifier")), ]) copy_of_the_pipe = photon_pipe.copy_me() self.assertEqual(photon_pipe.random_state, copy_of_the_pipe.random_state) self.assertTrue(len(copy_of_the_pipe.elements) == 5) self.assertTrue(copy_of_the_pipe.elements[2][1].name == "RandomStack") self.assertTrue(copy_of_the_pipe.named_steps["my_copy_switch"]. elements[1].test_disabled) self.assertDictEqual( copy_of_the_pipe.elements[2] [1].elements[1].elements[0].hyperparameters, {"PCA__n_components": [5, 10]}, ) self.assertTrue( isinstance(copy_of_the_pipe.elements[3][1], CallbackElement)) self.assertTrue(copy_of_the_pipe.named_steps["tmp_callback"]. delegate_function == np.mean) def test_random_state(self): photon_pipe = PhotonPipeline([("SS", self.p_ss), ("PCA", PipelineElement("PCA")), ("SVC", self.p_dt)]) photon_pipe.random_state = 666 photon_pipe.fit(self.X, self.y) self.assertEqual(self.p_dt.random_state, photon_pipe.random_state) self.assertEqual(photon_pipe.elements[1][-1].random_state, photon_pipe.random_state) self.assertEqual(self.p_dt.random_state, 666)