Пример #1
0
 def test_fit(self):
     X_train, Y_train, X_test, Y_test = putil.get_dataset("iris")
     automl = autosklearn.automl.AutoML(self.output, self.output, 10, 10)
     automl.fit(X_train, Y_train)
     score = automl.score(X_test, Y_test)
     self.assertGreaterEqual(score, 0.9)
     self.assertEqual(automl.task_, MULTICLASS_CLASSIFICATION)
Пример #2
0
    def test_binary_score(self):
        """
        Test fix for binary classification prediction
        taking the index 1 of second dimension in prediction matrix
        """
        if self.travis:
            self.skipTest('This test does currently not run on travis-ci. '
                          'Make sure it runs locally on your machine!')

        output = os.path.join(self.test_dir, '..', '.tmp_test_binary_score')
        self._setUp(output)

        data = sklearn.datasets.make_classification(n_samples=1000,
                                                    n_features=20,
                                                    n_redundant=5,
                                                    n_informative=5,
                                                    n_repeated=2,
                                                    n_clusters_per_class=2,
                                                    random_state=1)
        X_train = data[0][:700]
        Y_train = data[1][:700]
        X_test = data[0][700:]
        Y_test = data[1][700:]

        automl = autosklearn.automl.AutoML(output, output, 15, 15)
        automl.fit(X_train, Y_train, task=BINARY_CLASSIFICATION)
        self.assertEqual(automl._task, BINARY_CLASSIFICATION)

        score = automl.score(X_test, Y_test)
        self.assertGreaterEqual(score, 0.5)

        del automl
        self._tearDown(output)
Пример #3
0
 def test_fit(self):
     X_train, Y_train, X_test, Y_test = putil.get_dataset('iris')
     automl = autosklearn.automl.AutoML(self.output, self.output, 10, 10)
     automl.fit(X_train, Y_train)
     score = automl.score(X_test, Y_test)
     self.assertGreaterEqual(score, 0.9)
     self.assertEqual(automl._task, MULTICLASS_CLASSIFICATION)
Пример #4
0
    def test_binary_score(self):
        """
        Test fix for binary classification prediction
        taking the index 1 of second dimension in prediction matrix
        """
        if self.travis:
            self.skipTest('This test does currently not run on travis-ci. '
                          'Make sure it runs locally on your machine!')

        output = os.path.join(self.test_dir, '..', '.tmp_test_binary_score')
        self._setUp(output)

        data = sklearn.datasets.make_classification(
            n_samples=1000, n_features=20, n_redundant=5, n_informative=5,
            n_repeated=2, n_clusters_per_class=2, random_state=1)
        X_train = data[0][:700]
        Y_train = data[1][:700]
        X_test = data[0][700:]
        Y_test = data[1][700:]

        automl = autosklearn.automl.AutoML(output, output, 15, 15)
        automl.fit(X_train, Y_train, task=BINARY_CLASSIFICATION)
        self.assertEqual(automl._task, BINARY_CLASSIFICATION)

        score = automl.score(X_test, Y_test)
        self.assertGreaterEqual(score, 0.5)

        del automl
        self._tearDown(output)
Пример #5
0
    def test_binary_score_and_include(self):
        """
        Test fix for binary classification prediction
        taking the index 1 of second dimension in prediction matrix
        """
        backend_api = self._create_backend('test_binary_score_and_include')

        data = sklearn.datasets.make_classification(
            n_samples=400, n_features=10, n_redundant=1, n_informative=3,
            n_repeated=1, n_clusters_per_class=2, random_state=1)
        X_train = data[0][:200]
        Y_train = data[1][:200]
        X_test = data[0][200:]
        Y_test = data[1][200:]

        automl = autosklearn.automl.AutoML(backend_api, 20, 5,
                                           include_estimators=['sgd'],
                                           include_preprocessors=['no_preprocessing'])
        automl.fit(X_train, Y_train, task=BINARY_CLASSIFICATION,
                   metric=accuracy)
        self.assertEqual(automl._task, BINARY_CLASSIFICATION)

        # TODO, the assumption from above is not really tested here
        # Also, the score method should be removed, it only makes little sense
        score = automl.score(X_test, Y_test)
        self.assertGreaterEqual(score, 0.4)

        del automl
        self._tearDown(backend_api.temporary_directory)
        self._tearDown(backend_api.output_directory)
Пример #6
0
def test_binary_score_and_include(backend, dask_client):
    """
    Test fix for binary classification prediction
    taking the index 1 of second dimension in prediction matrix
    """

    data = sklearn.datasets.make_classification(
        n_samples=400, n_features=10, n_redundant=1, n_informative=3,
        n_repeated=1, n_clusters_per_class=2, random_state=1)
    X_train = data[0][:200]
    Y_train = data[1][:200]
    X_test = data[0][200:]
    Y_test = data[1][200:]

    automl = autosklearn.automl.AutoML(
        backend, 20, 5,
        include_estimators=['sgd'],
        include_preprocessors=['no_preprocessing'],
        metric=accuracy,
        dask_client=dask_client,
    )
    automl.fit(X_train, Y_train, task=BINARY_CLASSIFICATION)
    assert automl._task == BINARY_CLASSIFICATION

    # TODO, the assumption from above is not really tested here
    # Also, the score method should be removed, it only makes little sense
    score = automl.score(X_test, Y_test)
    assert score >= 0.4

    del automl
Пример #7
0
    def test_binary_score_and_include(self):
        """
        Test fix for binary classification prediction
        taking the index 1 of second dimension in prediction matrix
        """
        backend_api = self._create_backend('test_binary_score_and_include')

        data = sklearn.datasets.make_classification(
            n_samples=400, n_features=10, n_redundant=1, n_informative=3,
            n_repeated=1, n_clusters_per_class=2, random_state=1)
        X_train = data[0][:200]
        Y_train = data[1][:200]
        X_test = data[0][200:]
        Y_test = data[1][200:]

        automl = autosklearn.automl.AutoML(
            backend_api, 20, 5,
            include_estimators=['sgd'],
            include_preprocessors=['no_preprocessing'],
            metric=accuracy,
        )
        automl.fit(X_train, Y_train, task=BINARY_CLASSIFICATION)
        self.assertEqual(automl._task, BINARY_CLASSIFICATION)

        # TODO, the assumption from above is not really tested here
        # Also, the score method should be removed, it only makes little sense
        score = automl.score(X_test, Y_test)
        self.assertGreaterEqual(score, 0.4)

        del automl
        self._tearDown(backend_api.temporary_directory)
        self._tearDown(backend_api.output_directory)
Пример #8
0
    def test_fit_roar(self):
        def get_roar_object_callback(scenario_dict, seed, ta, **kwargs):
            """Random online adaptive racing.

            http://ml.informatik.uni-freiburg.de/papers/11-LION5-SMAC.pdf"""
            scenario = Scenario(scenario_dict)
            return ROAR(
                scenario=scenario,
                rng=seed,
                tae_runner=ta,
            )

        backend_api = self._create_backend('test_fit_roar')

        X_train, Y_train, X_test, Y_test = putil.get_dataset('iris')
        automl = autosklearn.automl.AutoML(
            backend=backend_api,
            time_left_for_this_task=20,
            per_run_time_limit=5,
            initial_configurations_via_metalearning=0,
            get_smac_object_callback=get_roar_object_callback,
        )
        automl.fit(
            X_train,
            Y_train,
            metric=accuracy,
            task=MULTICLASS_CLASSIFICATION,
        )
        score = automl.score(X_test, Y_test)
        self.assertGreaterEqual(score, 0.8)
        self.assertEqual(automl._task, MULTICLASS_CLASSIFICATION)

        del automl
        self._tearDown(backend_api.temporary_directory)
        self._tearDown(backend_api.output_directory)
Пример #9
0
    def test_binary_score(self):
        """
        Test fix for binary classification prediction
        taking the index 1 of second dimension in prediction matrix
        """

        output = os.path.join(self.test_dir, '..', '.tmp_test_binary_score')
        self._setUp(output)

        data = sklearn.datasets.make_classification(
            n_samples=1000, n_features=20, n_redundant=5, n_informative=5,
            n_repeated=2, n_clusters_per_class=2, random_state=1)
        X_train = data[0][:700]
        Y_train = data[1][:700]
        X_test = data[0][700:]
        Y_test = data[1][700:]

        backend_api = backend.create(output, output)
        automl = autosklearn.automl.AutoML(backend_api, 15, 5)
        automl.fit(X_train, Y_train, task=BINARY_CLASSIFICATION)
        self.assertEqual(automl._task, BINARY_CLASSIFICATION)

        score = automl.score(X_test, Y_test)
        self.assertGreaterEqual(score, 0.5)

        del automl
        self._tearDown(output)
Пример #10
0
def test_load_best_individual_model(metric, backend, dask_client):

    X_train, Y_train, X_test, Y_test = putil.get_dataset('iris')
    automl = autosklearn.automl.AutoML(
        backend=backend,
        time_left_for_this_task=30,
        per_run_time_limit=5,
        metric=metric,
        dask_client=dask_client,
    )

    # We cannot easily mock a function sent to dask
    # so for this test we create the whole set of models/ensembles
    # but prevent it to be loaded
    automl.fit(
        X_train,
        Y_train,
        task=MULTICLASS_CLASSIFICATION,
    )
    automl._backend.load_ensemble = unittest.mock.MagicMock(return_value=None)

    # A memory error occurs in the ensemble construction
    assert automl._backend.load_ensemble(automl._seed) is None

    # The load model is robust to this and loads the best model
    automl._load_models()
    assert automl.ensemble_ is not None

    # Just 1 model is there for ensemble and all weight must be on it
    get_models_with_weights = automl.get_models_with_weights()
    assert len(get_models_with_weights) == 1
    assert get_models_with_weights[0][0] == 1.0

    # Match a toy dataset
    if metric.name == 'balanced_accuracy':
        assert automl.score(X_test, Y_test) > 0.9
    elif metric.name == 'log_loss':
        # Seen values in github actions of 0.6978304740364537
        assert automl.score(X_test, Y_test) < 0.7
    else:
        raise ValueError(metric.name)

    del automl
Пример #11
0
    def test_load_best_individual_model(self):

        backend_api = self._create_backend('test_fit')

        for metric in [log_loss, balanced_accuracy]:
            X_train, Y_train, X_test, Y_test = putil.get_dataset('iris')
            automl = autosklearn.automl.AutoML(
                backend=backend_api,
                time_left_for_this_task=20,
                per_run_time_limit=5,
                metric=metric,
            )

            with unittest.mock.patch(
                'autosklearn.ensemble_builder.EnsembleBuilder.run'
            ) as mock_ensemble_run:
                mock_ensemble_run.side_effect = MemoryError
                automl.fit(
                    X_train, Y_train, task=MULTICLASS_CLASSIFICATION,
                )

            # A memory error occurs in the ensemble construction
            self.assertIsNone(automl._backend.load_ensemble(automl._seed))

            # The load model is robust to this and loads the best model
            automl._load_models()
            self.assertIsNotNone(automl.ensemble_)

            # Just 1 model is there for ensemble and all weight must be on it
            get_models_with_weights = automl.get_models_with_weights()
            self.assertEqual(len(get_models_with_weights), 1)
            self.assertEqual(get_models_with_weights[0][0], 1.0)

            # Match a toy dataset
            if metric._sign < 0:
                self.assertLessEqual(automl.score(X_test, Y_test), 0.2)
            else:
                self.assertGreaterEqual(automl.score(X_test, Y_test), 0.8)

            del automl
            self._tearDown(backend_api.temporary_directory)
            self._tearDown(backend_api.output_directory)
Пример #12
0
    def test_fit(self):
        output = os.path.join(self.test_dir, '..', '.tmp_test_fit')
        self._setUp(output)

        X_train, Y_train, X_test, Y_test = putil.get_dataset('iris')
        automl = autosklearn.automl.AutoML(output, output, 12, 12)
        automl.fit(X_train, Y_train)
        score = automl.score(X_test, Y_test)
        self.assertGreaterEqual(score, 0.8)
        self.assertEqual(automl._task, MULTICLASS_CLASSIFICATION)

        del automl
        self._tearDown(output)
Пример #13
0
    def test_fit(self):
        output = os.path.join(self.test_dir, '..', '.tmp_test_fit')
        self._setUp(output)

        X_train, Y_train, X_test, Y_test = putil.get_dataset('iris')
        backend_api = backend.create(output, output)
        automl = autosklearn.automl.AutoML(backend_api, 20, 5)
        automl.fit(X_train, Y_train, metric=accuracy)
        score = automl.score(X_test, Y_test)
        self.assertGreaterEqual(score, 0.8)
        self.assertEqual(automl._task, MULTICLASS_CLASSIFICATION)

        del automl
        self._tearDown(output)
Пример #14
0
    def test_fit(self):
        backend_api = self._create_backend('test_fit')

        X_train, Y_train, X_test, Y_test = putil.get_dataset('iris')
        automl = autosklearn.automl.AutoML(backend_api, 20, 5)
        automl.fit(
            X_train, Y_train, metric=accuracy, task=MULTICLASS_CLASSIFICATION,
        )
        score = automl.score(X_test, Y_test)
        self.assertGreaterEqual(score, 0.8)
        self.assertEqual(automl._task, MULTICLASS_CLASSIFICATION)

        del automl
        self._tearDown(backend_api.temporary_directory)
        self._tearDown(backend_api.output_directory)
Пример #15
0
    def test_fit(self):
        output = os.path.join(self.test_dir, '..', '.tmp_test_fit')
        self._setUp(output)

        X_train, Y_train, X_test, Y_test = putil.get_dataset('iris')
        backend_api = backend.create(output, output)
        automl = autosklearn.automl.AutoML(backend_api, 30, 5)
        automl.fit(X_train, Y_train)
        #print(automl.show_models(), flush=True)
        #print(automl.cv_results_, flush=True)
        score = automl.score(X_test, Y_test)
        self.assertGreaterEqual(score, 0.8)
        self.assertEqual(automl._task, MULTICLASS_CLASSIFICATION)

        del automl
        self._tearDown(output)
Пример #16
0
    def test_fit(self):
        if self.travis:
            self.skipTest('This test does currently not run on travis-ci. '
                          'Make sure it runs locally on your machine!')

        output = os.path.join(self.test_dir, '..', '.tmp_test_fit')
        self._setUp(output)

        X_train, Y_train, X_test, Y_test = putil.get_dataset('iris')
        automl = autosklearn.automl.AutoML(output, output, 15, 15)
        automl.fit(X_train, Y_train)
        score = automl.score(X_test, Y_test)
        self.assertGreaterEqual(score, 0.8)
        self.assertEqual(automl._task, MULTICLASS_CLASSIFICATION)

        del automl
        self._tearDown(output)
Пример #17
0
    def test_fit(self):
        if self.travis:
            self.skipTest('This test does currently not run on travis-ci. '
                          'Make sure it runs locally on your machine!')

        output = os.path.join(self.test_dir, '..', '.tmp_test_fit')
        self._setUp(output)

        X_train, Y_train, X_test, Y_test = putil.get_dataset('iris')
        automl = autosklearn.automl.AutoML(output, output, 15, 15)
        automl.fit(X_train, Y_train)
        score = automl.score(X_test, Y_test)
        self.assertGreaterEqual(score, 0.8)
        self.assertEqual(automl._task, MULTICLASS_CLASSIFICATION)

        del automl
        self._tearDown(output)
Пример #18
0
    def test_fit_roar(self):
        output = os.path.join(self.test_dir, '..', '.tmp_test_fit_roar')
        self._setUp(output)

        X_train, Y_train, X_test, Y_test = putil.get_dataset('iris')
        backend_api = backend.create(output, output)
        automl = autosklearn.automl.AutoML(backend_api, 20, 5,
                                           initial_configurations_via_metalearning=0,
                                           configuration_mode='ROAR')
        automl.fit(X_train, Y_train, metric=accuracy)
        # print(automl.show_models(), flush=True)
        # print(automl.cv_results_, flush=True)
        score = automl.score(X_test, Y_test)
        self.assertGreaterEqual(score, 0.8)
        self.assertEqual(automl._task, MULTICLASS_CLASSIFICATION)

        del automl
        self._tearDown(output)
Пример #19
0
    def test_fit(self):
        backend_api = self._create_backend('test_fit')

        X_train, Y_train, X_test, Y_test = putil.get_dataset('iris')
        automl = autosklearn.automl.AutoML(backend_api, 20, 5)
        automl.fit(
            X_train,
            Y_train,
            metric=accuracy,
            task=MULTICLASS_CLASSIFICATION,
        )
        score = automl.score(X_test, Y_test)
        self.assertGreaterEqual(score, 0.8)
        self.assertEqual(automl._task, MULTICLASS_CLASSIFICATION)

        del automl
        self._tearDown(backend_api.temporary_directory)
        self._tearDown(backend_api.output_directory)
Пример #20
0
def test_fit(dask_client, backend):

    X_train, Y_train, X_test, Y_test = putil.get_dataset('iris')
    automl = autosklearn.automl.AutoML(
        backend=backend,
        time_left_for_this_task=30,
        per_run_time_limit=5,
        metric=accuracy,
        dask_client=dask_client,
    )
    automl.fit(
        X_train, Y_train, task=MULTICLASS_CLASSIFICATION,
    )
    score = automl.score(X_test, Y_test)
    assert score > 0.8
    assert count_succeses(automl.cv_results_) > 0
    assert automl._task == MULTICLASS_CLASSIFICATION

    del automl
Пример #21
0
def test_fit_roar(dask_client_single_worker, backend):
    def get_roar_object_callback(
            scenario_dict,
            seed,
            ta,
            ta_kwargs,
            dask_client,
            n_jobs,
            **kwargs
    ):
        """Random online adaptive racing.

        http://ml.informatik.uni-freiburg.de/papers/11-LION5-SMAC.pdf"""
        scenario = Scenario(scenario_dict)
        return ROAR(
            scenario=scenario,
            rng=seed,
            tae_runner=ta,
            tae_runner_kwargs=ta_kwargs,
            dask_client=dask_client,
            n_jobs=n_jobs,
        )

    X_train, Y_train, X_test, Y_test = putil.get_dataset('iris')
    automl = autosklearn.automl.AutoML(
        backend=backend,
        time_left_for_this_task=30,
        per_run_time_limit=5,
        initial_configurations_via_metalearning=0,
        get_smac_object_callback=get_roar_object_callback,
        metric=accuracy,
        dask_client=dask_client_single_worker,
    )
    automl.fit(
        X_train, Y_train, task=MULTICLASS_CLASSIFICATION,
    )
    score = automl.score(X_test, Y_test)
    assert score > 0.8
    assert count_succeses(automl.cv_results_) > 0
    assert includes_train_scores(automl.performance_over_time_.columns) is True
    assert automl._task == MULTICLASS_CLASSIFICATION

    del automl
Пример #22
0
def test_fit(dask_client):

    X_train, Y_train, X_test, Y_test = putil.get_dataset('iris')
    automl = autosklearn.automl.AutoML(
        seed=0,
        time_left_for_this_task=30,
        per_run_time_limit=5,
        metric=accuracy,
        dask_client=dask_client,
    )
    automl.fit(X_train, Y_train, task=MULTICLASS_CLASSIFICATION)
    score = automl.score(X_test, Y_test)
    assert score > 0.8
    assert count_succeses(automl.cv_results_) > 0
    assert includes_train_scores(automl.performance_over_time_.columns) is True
    assert performance_over_time_is_plausible(
        automl.performance_over_time_) is True
    assert automl._task == MULTICLASS_CLASSIFICATION

    del automl
Пример #23
0
    def test_fit(self):
        backend_api = self._create_backend('test_fit')

        X_train, Y_train, X_test, Y_test = putil.get_dataset('iris')
        automl = autosklearn.automl.AutoML(
            backend=backend_api,
            time_left_for_this_task=20,
            per_run_time_limit=5,
            metric=accuracy,
        )
        automl.fit(
            X_train, Y_train, task=MULTICLASS_CLASSIFICATION,
        )
        score = automl.score(X_test, Y_test)
        self.assertGreaterEqual(score, 0.8)
        self.assertGreater(self._count_succeses(automl.cv_results_), 0)
        self.assertEqual(automl._task, MULTICLASS_CLASSIFICATION)

        del automl
        self._tearDown(backend_api.temporary_directory)
        self._tearDown(backend_api.output_directory)
Пример #24
0
    def test_binary_score_and_include(self):
        """
        Test fix for binary classification prediction
        taking the index 1 of second dimension in prediction matrix
        """

        output = os.path.join(self.test_dir, '..', '.tmp_test_binary_score')
        self._setUp(output)

        data = sklearn.datasets.make_classification(n_samples=400,
                                                    n_features=10,
                                                    n_redundant=1,
                                                    n_informative=3,
                                                    n_repeated=1,
                                                    n_clusters_per_class=2,
                                                    random_state=1)
        X_train = data[0][:200]
        Y_train = data[1][:200]
        X_test = data[0][200:]
        Y_test = data[1][200:]

        backend_api = backend.create(output, output)
        automl = autosklearn.automl.AutoML(
            backend_api,
            30,
            5,
            include_estimators=['sgd'],
            include_preprocessors=['no_preprocessing'])
        automl.fit(X_train, Y_train, task=BINARY_CLASSIFICATION)
        #print(automl.show_models(), flush=True)
        #print(automl.cv_results_, flush=True)
        self.assertEqual(automl._task, BINARY_CLASSIFICATION)

        # TODO, the assumption from above is not really tested here
        # Also, the score method should be removed, it only makes little sense
        score = automl.score(X_test, Y_test)
        self.assertGreaterEqual(score, 0.4)

        del automl
        self._tearDown(output)
Пример #25
0
    def test_fit_roar(self):
        def get_roar_object_callback(
                scenario_dict,
                seed,
                ta,
                **kwargs
        ):
            """Random online adaptive racing.

            http://ml.informatik.uni-freiburg.de/papers/11-LION5-SMAC.pdf"""
            scenario = Scenario(scenario_dict)
            return ROAR(
                scenario=scenario,
                rng=seed,
                tae_runner=ta,
            )

        backend_api = self._create_backend('test_fit_roar')

        X_train, Y_train, X_test, Y_test = putil.get_dataset('iris')
        automl = autosklearn.automl.AutoML(
            backend=backend_api,
            time_left_for_this_task=20,
            per_run_time_limit=5,
            initial_configurations_via_metalearning=0,
            get_smac_object_callback=get_roar_object_callback,
        )
        automl.fit(
            X_train, Y_train, metric=accuracy, task=MULTICLASS_CLASSIFICATION,
        )
        score = automl.score(X_test, Y_test)
        self.assertGreaterEqual(score, 0.8)
        self.assertEqual(automl._task, MULTICLASS_CLASSIFICATION)

        del automl
        self._tearDown(backend_api.temporary_directory)
        self._tearDown(backend_api.output_directory)