Пример #1
0
 def test_default_configuration_iterative_fit(self):
     classifier = SimpleClassificationPipeline(
         include={'classifier': ['random_forest'],
                  'feature_preprocessor': ['no_preprocessing']})
     X_train, Y_train, X_test, Y_test = get_dataset(dataset='iris')
     classifier.fit_transformer(X_train, Y_train)
     for i in range(1, 11):
         classifier.iterative_fit(X_train, Y_train)
         self.assertEqual(classifier.steps[-1][-1].choice.estimator.n_estimators,
                          i)
Пример #2
0
 def test_default_configuration_iterative_fit(self):
     classifier = SimpleClassificationPipeline(
         include={'classifier': ['random_forest'],
                  'preprocessor': ['no_preprocessing']})
     X_train, Y_train, X_test, Y_test = get_dataset(dataset='iris')
     XT = classifier.fit_transformer(X_train, Y_train)
     for i in range(1, 11):
         classifier.iterative_fit(X_train, Y_train)
         self.assertEqual(classifier.steps[-1][-1].choice.estimator.n_estimators,
                          i)
Пример #3
0
    def test_weighting_effect(self):
        data = sklearn.datasets.make_classification(
            n_samples=200, n_features=10, n_redundant=2, n_informative=2,
            n_repeated=2, n_clusters_per_class=2, weights=[0.8, 0.2],
            random_state=1)

        for name, clf, acc_no_weighting, acc_weighting in \
                [('adaboost', AdaboostClassifier, 0.810, 0.735),
                 ('decision_tree', DecisionTree, 0.780, 0.643),
                 ('extra_trees', ExtraTreesClassifier, 0.75, 0.800),
                 ('gradient_boosting', GradientBoostingClassifier,
                  0.789, 0.762),
                 ('random_forest', RandomForest, 0.75, 0.821),
                 ('libsvm_svc', LibSVM_SVC, 0.769, 0.72),
                 ('liblinear_svc', LibLinear_SVC, 0.762, 0.735),
                 ('sgd', SGD, 0.704, 0.667)
                ]:
            for strategy, acc in [('none', acc_no_weighting),
                                  ('weighting', acc_weighting)]:
                # Fit
                data_ = copy.copy(data)
                X_train = data_[0][:100]
                Y_train = data_[1][:100]
                X_test = data_[0][100:]
                Y_test = data_[1][100:]

                include = {'classifier': [name],
                           'preprocessor': ['no_preprocessing']}
                classifier = SimpleClassificationPipeline(
                    random_state=1, include=include)
                cs = classifier.get_hyperparameter_search_space()
                default = cs.get_default_configuration()
                default._values['balancing:strategy'] = strategy
                classifier = SimpleClassificationPipeline(
                    default, random_state=1, include=include)
                predictor = classifier.fit(X_train, Y_train)
                predictions = predictor.predict(X_test)
                self.assertAlmostEqual(
                    sklearn.metrics.f1_score(predictions, Y_test), acc,
                    places=3, msg=(name, strategy))

                # fit_transformer and fit_estimator
                data_ = copy.copy(data)
                X_train = data_[0][:100]
                Y_train = data_[1][:100]
                X_test = data_[0][100:]
                Y_test = data_[1][100:]

                classifier = SimpleClassificationPipeline(
                    default, random_state=1, include=include)
                classifier.set_hyperparameters(configuration=default)
                Xt, fit_params = classifier.fit_transformer(X_train, Y_train)
                classifier.fit_estimator(Xt, Y_train, **fit_params)
                predictions = classifier.predict(X_test)
                self.assertAlmostEqual(
                    sklearn.metrics.f1_score(predictions, Y_test), acc,
                    places=3)

        for name, pre, acc_no_weighting, acc_weighting in \
                [('extra_trees_preproc_for_classification',
                    ExtraTreesPreprocessorClassification, 0.691, 0.692),
                 ('liblinear_svc_preprocessor', LibLinear_Preprocessor,
                    0.692, 0.590)]:
            for strategy, acc in [('none', acc_no_weighting),
                                  ('weighting', acc_weighting)]:
                data_ = copy.copy(data)
                X_train = data_[0][:100]
                Y_train = data_[1][:100]
                X_test = data_[0][100:]
                Y_test = data_[1][100:]

                include = {'classifier': ['sgd'], 'preprocessor': [name]}

                classifier = SimpleClassificationPipeline(
                    random_state=1, include=include)
                cs = classifier.get_hyperparameter_search_space()
                default = cs.get_default_configuration()
                default._values['balancing:strategy'] = strategy
                classifier.set_hyperparameters(default)
                predictor = classifier.fit(X_train, Y_train)
                predictions = predictor.predict(X_test)
                self.assertAlmostEqual(
                    sklearn.metrics.f1_score(predictions, Y_test), acc,
                    places=3, msg=(name, strategy))

                # fit_transformer and fit_estimator
                data_ = copy.copy(data)
                X_train = data_[0][:100]
                Y_train = data_[1][:100]
                X_test = data_[0][100:]
                Y_test = data_[1][100:]

                default._values['balancing:strategy'] = strategy
                classifier = SimpleClassificationPipeline(
                    default, random_state=1, include=include)
                Xt, fit_params = classifier.fit_transformer(X_train, Y_train)
                classifier.fit_estimator(Xt, Y_train, **fit_params)
                predictions = classifier.predict(X_test)
                self.assertAlmostEqual(
                    sklearn.metrics.f1_score(predictions, Y_test), acc,
                    places=3)
Пример #4
0
    def test_weighting_effect(self):
        data = sklearn.datasets.make_classification(
            n_samples=200, n_features=10, n_redundant=2, n_informative=2,
            n_repeated=2, n_clusters_per_class=2, weights=[0.8, 0.2],
            random_state=1)

        for name, clf, acc_no_weighting, acc_weighting, places in \
                [('adaboost', AdaboostClassifier, 0.810, 0.735, 3),
                 ('decision_tree', DecisionTree, 0.780, 0.643, 3),
                 ('extra_trees', ExtraTreesClassifier, 0.780, 0.8, 3),
                 ('gradient_boosting', GradientBoostingClassifier,
                  0.737, 0.684, 3),
                 ('random_forest', RandomForest, 0.780, 0.789, 3),
                 ('libsvm_svc', LibSVM_SVC, 0.769, 0.72, 3),
                 ('liblinear_svc', LibLinear_SVC, 0.762, 0.735, 3),
                 ('passive_aggressive', PassiveAggressive, 0.642, 0.449, 3),
                 ('sgd', SGD, 0.818, 0.575, 2)
                ]:
            for strategy, acc in [
                ('none', acc_no_weighting),
                ('weighting', acc_weighting)
            ]:
                # Fit
                data_ = copy.copy(data)
                X_train = data_[0][:100]
                Y_train = data_[1][:100]
                X_test = data_[0][100:]
                Y_test = data_[1][100:]

                include = {'classifier': [name],
                           'preprocessor': ['no_preprocessing']}
                classifier = SimpleClassificationPipeline(
                    random_state=1, include=include)
                cs = classifier.get_hyperparameter_search_space()
                default = cs.get_default_configuration()
                default._values['balancing:strategy'] = strategy
                classifier = SimpleClassificationPipeline(
                    default, random_state=1, include=include)
                predictor = classifier.fit(X_train, Y_train)
                predictions = predictor.predict(X_test)
                self.assertAlmostEqual(
                    sklearn.metrics.f1_score(predictions, Y_test), acc,
                    places=places, msg=(name, strategy))

                # fit_transformer and fit_estimator
                data_ = copy.copy(data)
                X_train = data_[0][:100]
                Y_train = data_[1][:100]
                X_test = data_[0][100:]
                Y_test = data_[1][100:]

                classifier = SimpleClassificationPipeline(
                    default, random_state=1, include=include)
                classifier.set_hyperparameters(configuration=default)
                Xt, fit_params = classifier.fit_transformer(X_train, Y_train)
                classifier.fit_estimator(Xt, Y_train, **fit_params)
                predictions = classifier.predict(X_test)
                self.assertAlmostEqual(
                    sklearn.metrics.f1_score(predictions, Y_test), acc,
                    places=places)

        for name, pre, acc_no_weighting, acc_weighting in \
                [('extra_trees_preproc_for_classification',
                    ExtraTreesPreprocessorClassification, 0.810, 0.563),
                 ('liblinear_svc_preprocessor', LibLinear_Preprocessor,
                    0.837, 0.567)]:
            for strategy, acc in [('none', acc_no_weighting),
                                  ('weighting', acc_weighting)]:
                data_ = copy.copy(data)
                X_train = data_[0][:100]
                Y_train = data_[1][:100]
                X_test = data_[0][100:]
                Y_test = data_[1][100:]

                include = {'classifier': ['sgd'], 'preprocessor': [name]}

                classifier = SimpleClassificationPipeline(
                    random_state=1, include=include)
                cs = classifier.get_hyperparameter_search_space()
                default = cs.get_default_configuration()
                default._values['balancing:strategy'] = strategy
                classifier.set_hyperparameters(default)
                predictor = classifier.fit(X_train, Y_train)
                predictions = predictor.predict(X_test)
                self.assertAlmostEqual(
                    sklearn.metrics.f1_score(predictions, Y_test), acc,
                    places=3, msg=(name, strategy))

                # fit_transformer and fit_estimator
                data_ = copy.copy(data)
                X_train = data_[0][:100]
                Y_train = data_[1][:100]
                X_test = data_[0][100:]
                Y_test = data_[1][100:]

                default._values['balancing:strategy'] = strategy
                classifier = SimpleClassificationPipeline(
                    default, random_state=1, include=include)
                Xt, fit_params = classifier.fit_transformer(X_train, Y_train)
                classifier.fit_estimator(Xt, Y_train, **fit_params)
                predictions = classifier.predict(X_test)
                self.assertAlmostEqual(
                    sklearn.metrics.f1_score(predictions, Y_test), acc,
                    places=3)