Пример #1
0
    def __init__(self,
                 autosklearn_tmp_dir,
                 dataset_name,
                 task_type,
                 metric,
                 limit,
                 output_dir,
                 ensemble_size=None,
                 ensemble_nbest=None,
                 seed=1,
                 shared_mode=False,
                 max_iterations=-1,
                 precision="32",
                 low_precision=True):
        super(EnsembleBuilder, self).__init__()

        self.autosklearn_tmp_dir = autosklearn_tmp_dir
        self.dataset_name = dataset_name
        self.task_type = task_type
        self.metric = metric
        self.limit = limit
        self.output_dir = output_dir
        self.ensemble_size = ensemble_size
        self.ensemble_nbest = ensemble_nbest
        self.seed = seed
        self.shared_mode = shared_mode
        self.max_iterations = max_iterations
        self.precision = precision
        self.low_precision = low_precision

        logger_name = 'EnsembleBuilder(%d):%s' % (self.seed, self.dataset_name)
        setup_logger(
            os.path.join(self.autosklearn_tmp_dir,
                         '%s.log' % str(logger_name)))
        self.logger = get_logger(logger_name)
Пример #2
0
    def __init__(
        self,
        temporary_directory: str,
        output_directory: Optional[str],
        delete_tmp_folder_after_terminate: bool,
        delete_output_folder_after_terminate: bool,
    ):

        # Check that the names of tmp_dir and output_dir is not the same.
        if temporary_directory == output_directory and temporary_directory is not None:
            raise ValueError("The temporary and the output directory "
                             "must be different.")

        self.delete_tmp_folder_after_terminate = delete_tmp_folder_after_terminate
        self.delete_output_folder_after_terminate = delete_output_folder_after_terminate
        # attributes to check that directories were created by autosklearn.
        self._tmp_dir_created = False
        self._output_dir_created = False

        self._temporary_directory = (get_randomized_directory_name(
            temporary_directory=temporary_directory, ))
        self._output_directory = output_directory
        self.create_directories()
        # This is the first place the logger gets created.
        # We want to make sure any logging forward sets the correct directory
        # were all files should be created
        logging.setup_logger(output_dir=self._temporary_directory)
        self._logger = logging.get_logger(__name__)
Пример #3
0
 def _get_logger(self, name):
     logger_name = 'AutoML(%d):%s' % (self._seed, name)
     setup_logger(os.path.join(self._backend.temporary_directory,
                               '%s.log' % str(logger_name)),
                  self.logging_config,
                  )
     return get_logger(logger_name)
Пример #4
0
    def test_do_dummy_prediction(self):
        for name in ['401_bac', '31_bac', 'adult', 'cadata']:
            backend_api = self._create_backend('test_do_dummy_prediction')

            dataset = os.path.join(self.test_dir, '..', '.data', name)

            auto = autosklearn.automl.AutoML(
                backend_api, 20, 5, initial_configurations_via_metalearning=25)
            setup_logger()
            auto._logger = get_logger('test_do_dummy_predictions')
            auto._backend._make_internals_directory()
            D = load_data(dataset, backend_api)
            auto._backend.save_datamanager(D)
            auto._do_dummy_prediction(D, 1)

            # Ensure that the dummy predictions are not in the current working
            # directory, but in the temporary directory.
            self.assertFalse(
                os.path.exists(os.path.join(os.getcwd(), '.auto-sklearn')))
            self.assertTrue(
                os.path.exists(
                    os.path.join(backend_api.temporary_directory,
                                 '.auto-sklearn', 'predictions_ensemble',
                                 'predictions_ensemble_1_1_0.0.npy')))

            del auto
            self._tearDown(backend_api.temporary_directory)
            self._tearDown(backend_api.output_directory)
Пример #5
0
    def test_do_dummy_prediction(self):
        datasets = {
            'breast_cancer': BINARY_CLASSIFICATION,
            'wine': MULTICLASS_CLASSIFICATION,
            'diabetes': REGRESSION,
        }

        for name, task in datasets.items():
            backend_api = self._create_backend('test_do_dummy_prediction')

            X_train, Y_train, X_test, Y_test = putil.get_dataset(name)
            datamanager = XYDataManager(
                X_train,
                Y_train,
                X_test,
                Y_test,
                task=task,
                dataset_name=name,
                feat_type=None,
            )

            auto = autosklearn.automl.AutoML(
                backend_api,
                20,
                5,
                initial_configurations_via_metalearning=25,
                metric=accuracy,
            )
            setup_logger()
            auto._logger = get_logger('test_do_dummy_predictions')
            auto._backend.save_datamanager(datamanager)
            D = backend_api.load_datamanager()

            # Check if data manager is correcly loaded
            self.assertEqual(D.info['task'], datamanager.info['task'])

            auto._do_dummy_prediction(D, 1)

            # Ensure that the dummy predictions are not in the current working
            # directory, but in the temporary directory.
            self.assertFalse(
                os.path.exists(os.path.join(os.getcwd(), '.auto-sklearn')))
            self.assertTrue(
                os.path.exists(
                    os.path.join(backend_api.temporary_directory,
                                 '.auto-sklearn', 'predictions_ensemble',
                                 'predictions_ensemble_1_1_0.0.npy')))

            del auto
            self._tearDown(backend_api.temporary_directory)
            self._tearDown(backend_api.output_directory)
Пример #6
0
    def test_exceptions_inside_log_in_smbo(self, smbo_run_mock):

        # Make sure that any exception during the AutoML fit due to
        # SMAC are properly captured in a log file
        backend_api = self._create_backend('test_exceptions_inside_log')
        self._tearDown(backend_api.temporary_directory)
        self._tearDown(backend_api.output_directory)

        automl = autosklearn.automl.AutoML(
            backend_api,
            20,
            5,
            metric=accuracy,
        )

        output_file = 'test_exceptions_inside_log.log'
        setup_logger(output_file=output_file)
        logger = get_logger('test_exceptions_inside_log')

        # Create a custom exception to prevent other errors to slip in
        class MyException(Exception):
            pass

        X_train, Y_train, X_test, Y_test = putil.get_dataset('iris')
        # The first call is on dummy predictor failure
        message = str(np.random.randint(100)) + '_run_smbo'
        smbo_run_mock.side_effect = MyException(message)

        with unittest.mock.patch(
                'autosklearn.automl.AutoML._get_logger') as mock:
            mock.return_value = logger
            with self.assertRaises(MyException):
                automl.fit(
                    X_train,
                    Y_train,
                    task=MULTICLASS_CLASSIFICATION,
                )
            with open(output_file) as f:
                self.assertTrue(message in f.read())

        # Cleanup
        os.unlink(output_file)
        self._tearDown(backend_api.temporary_directory)
        self._tearDown(backend_api.output_directory)
Пример #7
0
    def test_fit_roar(self):
        def get_roar_object_callback(
                scenario_dict,
                seed,
                ta,
                ta_kwargs,
                **kwargs
        ):
            """Random online adaptive racing.

            http://ml.informatik.uni-freiburg.de/papers/11-LION5-SMAC.pdf"""
            scenario = Scenario(scenario_dict)
            return ROAR(
                scenario=scenario,
                rng=seed,
                tae_runner=ta,
                tae_runner_kwargs=ta_kwargs,
            )

        backend_api = self._create_backend('test_fit_roar')

        X_train, Y_train, X_test, Y_test = putil.get_dataset('iris')
        automl = autosklearn.automl.AutoML(
            backend=backend_api,
            time_left_for_this_task=20,
            per_run_time_limit=5,
            initial_configurations_via_metalearning=0,
            get_smac_object_callback=get_roar_object_callback,
            metric=accuracy,
        )
        setup_logger()
        automl._logger = get_logger('test_fit_roar')
        automl.fit(
            X_train, Y_train, task=MULTICLASS_CLASSIFICATION,
        )
        score = automl.score(X_test, Y_test)
        self.assertGreaterEqual(score, 0.8)
        self.assertGreater(self._count_succeses(automl.cv_results_), 0)
        self.assertEqual(automl._task, MULTICLASS_CLASSIFICATION)

        del automl
        self._tearDown(backend_api.temporary_directory)
        self._tearDown(backend_api.output_directory)
Пример #8
0
def test_do_dummy_prediction(backend, dask_client, datasets):

    name, task = datasets

    X_train, Y_train, X_test, Y_test = putil.get_dataset(name)
    datamanager = XYDataManager(
        X_train,
        Y_train,
        X_test,
        Y_test,
        task=task,
        dataset_name=name,
        feat_type=None,
    )

    auto = autosklearn.automl.AutoML(
        backend,
        20,
        5,
        initial_configurations_via_metalearning=25,
        metric=accuracy,
        dask_client=dask_client,
    )
    setup_logger(backend.temporary_directory)
    auto._logger = get_logger('test_do_dummy_predictions')

    auto._backend.save_datamanager(datamanager)
    D = backend.load_datamanager()

    # Check if data manager is correcly loaded
    assert D.info['task'] == datamanager.info['task']
    auto._do_dummy_prediction(D, 1)

    # Ensure that the dummy predictions are not in the current working
    # directory, but in the temporary directory.
    assert not os.path.exists(os.path.join(os.getcwd(), '.auto-sklearn'))
    assert os.path.exists(
        os.path.join(backend.temporary_directory, '.auto-sklearn', 'runs',
                     '1_1_0.0', 'predictions_ensemble_1_1_0.0.npy'))

    del auto
Пример #9
0
    def test_setup_logger(self):
        # Test that setup_logger function correctly configures the logger
        # according to the given dictionary, and uses the default
        # logging.yaml file if logging_config is not specified.

        with open(
                os.path.join(os.path.dirname(__file__), 'example_config.yaml'),
                'r') as fh:
            example_config = yaml.safe_load(fh)

        # Configure logger with example_config.yaml.
        logging_.setup_logger(logging_config=example_config,
                              output_dir=tempfile.gettempdir())

        # example_config sets the root logger's level to CRITICAL,
        # which corresponds to 50.
        self.assertEqual(logging.getLogger().getEffectiveLevel(), 50)

        # This time use the default configuration.
        logging_.setup_logger(logging_config=None,
                              output_dir=tempfile.gettempdir())

        # default config sets the root logger's level to DEBUG,
        # which corresponds to 10.
        self.assertEqual(logging.getLogger().getEffectiveLevel(), 10)

        # Make sure we log to the desired directory
        logging_.setup_logger(output_dir=os.path.dirname(__file__),
                              filename='test.log')
        logger = logging.getLogger()
        logger.info('test_setup_logger')

        with open(os.path.join(os.path.dirname(__file__), 'test.log')) as fh:
            self.assertIn('test_setup_logger', ''.join(fh.readlines()))
        os.remove(os.path.join(os.path.dirname(__file__), 'test.log'))
Пример #10
0
    def test_setup_logger(self):
        # Test that setup_logger function correctly configures the logger
        # according to the given dictionary, and uses the default
        # logging.yaml file if logging_config is not specified.

        with open(os.path.join(os.path.dirname(__file__), \
                               'example_config.yaml'), 'r') as fh:
            example_config = yaml.safe_load(fh)

        # Configure logger with example_config.yaml.
        logging_.setup_logger(logging_config=example_config)

        # example_config sets the root logger's level to CRITICAL,
        # which corresponds to 50.
        self.assertEqual(logging.getLogger().getEffectiveLevel(), 50)

        # This time use the default configuration.
        logging_.setup_logger(logging_config=None)

        # default config sets the root logger's level to DEBUG,
        # which corresponds to 10.
        self.assertEqual(logging.getLogger().getEffectiveLevel(), 10)
Пример #11
0
def test_fit_roar(dask_client_single_worker, backend):
    def get_roar_object_callback(scenario_dict, seed, ta, ta_kwargs,
                                 dask_client, n_jobs, **kwargs):
        """Random online adaptive racing.

        http://ml.informatik.uni-freiburg.de/papers/11-LION5-SMAC.pdf"""
        scenario = Scenario(scenario_dict)
        return ROAR(
            scenario=scenario,
            rng=seed,
            tae_runner=ta,
            tae_runner_kwargs=ta_kwargs,
            dask_client=dask_client,
            n_jobs=n_jobs,
        )

    X_train, Y_train, X_test, Y_test = putil.get_dataset('iris')
    automl = autosklearn.automl.AutoML(
        backend=backend,
        time_left_for_this_task=30,
        per_run_time_limit=5,
        initial_configurations_via_metalearning=0,
        get_smac_object_callback=get_roar_object_callback,
        metric=accuracy,
        dask_client=dask_client_single_worker,
    )
    setup_logger()
    automl._logger = get_logger('test_fit_roar')
    automl.fit(
        X_train,
        Y_train,
        task=MULTICLASS_CLASSIFICATION,
    )
    score = automl.score(X_test, Y_test)
    assert score > 0.8
    assert count_succeses(automl.cv_results_) > 0
    assert automl._task == MULTICLASS_CLASSIFICATION

    del automl
Пример #12
0
    def __init__(self, autosklearn_tmp_dir, dataset_name, task_type, metric,
                 limit, output_dir, ensemble_size=None, ensemble_nbest=None,
                 seed=1, shared_mode=False, max_iterations=-1, precision="32",
                 low_precision=True):
        super(EnsembleBuilder, self).__init__()

        self.autosklearn_tmp_dir = autosklearn_tmp_dir
        self.dataset_name = dataset_name
        self.task_type = task_type
        self.metric = metric
        self.limit = limit
        self.output_dir = output_dir
        self.ensemble_size = ensemble_size
        self.ensemble_nbest = ensemble_nbest
        self.seed = seed
        self.shared_mode = shared_mode
        self.max_iterations = max_iterations
        self.precision = precision
        self.low_precision = low_precision

        logger_name = 'EnsembleBuilder(%d):%s' % (self.seed, self.dataset_name)
        setup_logger(os.path.join(self.autosklearn_tmp_dir,
                                  '%s.log' % str(logger_name)))
        self.logger = get_logger(logger_name)
Пример #13
0
def test_exceptions_inside_log_in_smbo(smbo_run_mock, backend, dask_client):

    automl = autosklearn.automl.AutoML(
        backend,
        20,
        5,
        metric=accuracy,
        dask_client=dask_client,
    )

    output_file = 'test_exceptions_inside_log.log'
    setup_logger(output_file=output_file)
    logger = get_logger('test_exceptions_inside_log')

    # Create a custom exception to prevent other errors to slip in
    class MyException(Exception):
        pass

    X_train, Y_train, X_test, Y_test = putil.get_dataset('iris')
    # The first call is on dummy predictor failure
    message = str(np.random.randint(100)) + '_run_smbo'
    smbo_run_mock.side_effect = MyException(message)

    with unittest.mock.patch('autosklearn.automl.AutoML._get_logger') as mock:
        mock.return_value = logger
        with pytest.raises(MyException):
            automl.fit(
                X_train,
                Y_train,
                task=MULTICLASS_CLASSIFICATION,
            )
        with open(output_file) as f:
            assert message in f.read()

    # Cleanup
    os.unlink(output_file)
Пример #14
0
def test_fail_if_dummy_prediction_fails(ta_run_mock, backend, dask_client):

    X_train, Y_train, X_test, Y_test = putil.get_dataset('iris')
    datamanager = XYDataManager(
        X_train,
        Y_train,
        X_test,
        Y_test,
        task=2,
        feat_type=['Numerical' for i in range(X_train.shape[1])],
        dataset_name='iris',
    )

    time_for_this_task = 30
    per_run_time = 10
    auto = autosklearn.automl.AutoML(
        backend,
        time_for_this_task,
        per_run_time,
        initial_configurations_via_metalearning=25,
        metric=accuracy,
        dask_client=dask_client,
    )
    setup_logger()
    auto._logger = get_logger('test_fail_if_dummy_prediction_fails')
    auto._backend._make_internals_directory()
    auto._backend.save_datamanager(datamanager)

    # First of all, check that ta.run() is actually called.
    ta_run_mock.return_value = StatusType.SUCCESS, None, None, {}
    auto._do_dummy_prediction(datamanager, 1)
    ta_run_mock.assert_called_once_with(1, cutoff=time_for_this_task)

    # Case 1. Check that function raises no error when statustype == success.
    # ta.run() returns status, cost, runtime, and additional info.
    ta_run_mock.return_value = StatusType.SUCCESS, None, None, {}
    raised = False
    try:
        auto._do_dummy_prediction(datamanager, 1)
    except ValueError:
        raised = True
    assert not raised, 'Exception raised'

    # Case 2. Check that if statustype returned by ta.run() != success,
    # the function raises error.
    ta_run_mock.return_value = StatusType.CRASHED, None, None, {}
    with pytest.raises(
            ValueError,
            match=
            'Dummy prediction failed with run state StatusType.CRASHED and additional output: {}.'  # noqa
    ):
        auto._do_dummy_prediction(datamanager, 1)

    ta_run_mock.return_value = StatusType.ABORT, None, None, {}
    with pytest.raises(
            ValueError,
            match='Dummy prediction failed with run state StatusType.ABORT '
            'and additional output: {}.',
    ):
        auto._do_dummy_prediction(datamanager, 1)
    ta_run_mock.return_value = StatusType.TIMEOUT, None, None, {}
    with pytest.raises(
            ValueError,
            match='Dummy prediction failed with run state StatusType.TIMEOUT '
            'and additional output: {}.'):
        auto._do_dummy_prediction(datamanager, 1)
    ta_run_mock.return_value = StatusType.MEMOUT, None, None, {}
    with pytest.raises(
            ValueError,
            match='Dummy prediction failed with run state StatusType.MEMOUT '
            'and additional output: {}.',
    ):
        auto._do_dummy_prediction(datamanager, 1)
    ta_run_mock.return_value = StatusType.CAPPED, None, None, {}
    with pytest.raises(
            ValueError,
            match='Dummy prediction failed with run state StatusType.CAPPED '
            'and additional output: {}.'):
        auto._do_dummy_prediction(datamanager, 1)

    ta_run_mock.return_value = StatusType.CRASHED, None, None, {'exitcode': -6}
    with pytest.raises(
            ValueError,
            match=
            'The error suggests that the provided memory limits were too tight.',
    ):
        auto._do_dummy_prediction(datamanager, 1)
Пример #15
0
    def test_fail_if_dummy_prediction_fails(self, ta_run_mock):
        backend_api = self._create_backend(
            'test_fail_if_dummy_prediction_fails')

        dataset = os.path.join(self.test_dir, '..', '.data', '401_bac')

        time_for_this_task = 30
        per_run_time = 10
        auto = autosklearn.automl.AutoML(
            backend_api,
            time_for_this_task,
            per_run_time,
            initial_configurations_via_metalearning=25,
        )
        setup_logger()
        auto._logger = get_logger('test_fail_if_dummy_prediction_fails')
        auto._backend._make_internals_directory()
        D = load_data(dataset, backend_api)
        auto._backend.save_datamanager(D)

        # First of all, check that ta.run() is actually called.
        ta_run_mock.return_value = StatusType.SUCCESS, None, None, "test"
        auto._do_dummy_prediction(D, 1)
        ta_run_mock.assert_called_once_with(1, cutoff=time_for_this_task)

        # Case 1. Check that function raises no error when statustype == success.
        # ta.run() returns status, cost, runtime, and additional info.
        ta_run_mock.return_value = StatusType.SUCCESS, None, None, "test"
        raised = False
        try:
            auto._do_dummy_prediction(D, 1)
        except ValueError:
            raised = True
        self.assertFalse(raised, 'Exception raised')

        # Case 2. Check that if statustype returned by ta.run() != success,
        # the function raises error.
        ta_run_mock.return_value = StatusType.CRASHED, None, None, "test"
        self.assertRaisesRegex(
            ValueError,
            'Dummy prediction failed: test',
            auto._do_dummy_prediction,
            D,
            1,
        )
        ta_run_mock.return_value = StatusType.ABORT, None, None, "test"
        self.assertRaisesRegex(
            ValueError,
            'Dummy prediction failed: test',
            auto._do_dummy_prediction,
            D,
            1,
        )
        ta_run_mock.return_value = StatusType.TIMEOUT, None, None, "test"
        self.assertRaisesRegex(
            ValueError,
            'Dummy prediction failed: test',
            auto._do_dummy_prediction,
            D,
            1,
        )
        ta_run_mock.return_value = StatusType.MEMOUT, None, None, "test"
        self.assertRaisesRegex(
            ValueError,
            'Dummy prediction failed: test',
            auto._do_dummy_prediction,
            D,
            1,
        )
        ta_run_mock.return_value = StatusType.CAPPED, None, None, "test"
        self.assertRaisesRegex(
            ValueError,
            'Dummy prediction failed: test',
            auto._do_dummy_prediction,
            D,
            1,
        )

        self._tearDown(backend_api.temporary_directory)
        self._tearDown(backend_api.output_directory)
Пример #16
0
    def test_automl_outputs(self):
        backend_api = self._create_backend('test_automl_outputs')

        X_train, Y_train, X_test, Y_test = putil.get_dataset('iris')
        name = 'iris'
        data_manager_file = os.path.join(
            backend_api.temporary_directory,
            '.auto-sklearn',
            'datamanager.pkl'
        )

        auto = autosklearn.automl.AutoML(
            backend_api, 20, 5,
            initial_configurations_via_metalearning=0,
            seed=100,
            metric=accuracy,
        )
        setup_logger()
        auto._logger = get_logger('test_automl_outputs')
        auto.fit(
            X=X_train,
            y=Y_train,
            X_test=X_test,
            y_test=Y_test,
            dataset_name=name,
            task=MULTICLASS_CLASSIFICATION,
        )

        # pickled data manager (without one hot encoding!)
        with open(data_manager_file, 'rb') as fh:
            D = pickle.load(fh)
            self.assertTrue(np.allclose(D.data['X_train'], X_train))

        # Check that all directories are there
        fixture = ['cv_models', 'true_targets_ensemble.npy',
                   'start_time_100', 'datamanager.pkl',
                   'predictions_ensemble',
                   'ensembles', 'predictions_test', 'models']
        self.assertEqual(sorted(os.listdir(os.path.join(backend_api.temporary_directory,
                                                        '.auto-sklearn'))),
                         sorted(fixture))

        # At least one ensemble, one validation, one test prediction and one
        # model and one ensemble
        fixture = os.listdir(os.path.join(backend_api.temporary_directory,
                                          '.auto-sklearn', 'predictions_ensemble'))
        self.assertGreater(len(fixture), 0)

        fixture = glob.glob(os.path.join(backend_api.temporary_directory, '.auto-sklearn',
                                         'models', '100.*.model'))
        self.assertGreater(len(fixture), 0)

        fixture = os.listdir(os.path.join(backend_api.temporary_directory,
                                          '.auto-sklearn', 'ensembles'))
        self.assertIn('100.0000000001.ensemble', fixture)

        # Start time
        start_time_file_path = os.path.join(backend_api.temporary_directory,
                                            '.auto-sklearn', "start_time_100")
        with open(start_time_file_path, 'r') as fh:
            start_time = float(fh.read())
        self.assertGreaterEqual(time.time() - start_time, 10)

        del auto
        self._tearDown(backend_api.temporary_directory)
        self._tearDown(backend_api.output_directory)
Пример #17
0
    def test_fail_if_dummy_prediction_fails(self, ta_run_mock):
        backend_api = self._create_backend('test_fail_if_dummy_prediction_fails')

        X_train, Y_train, X_test, Y_test = putil.get_dataset('iris')
        datamanager = XYDataManager(
            X_train, Y_train,
            X_test, Y_test,
            task=2,
            feat_type=['Numerical' for i in range(X_train.shape[1])],
            dataset_name='iris',
        )

        time_for_this_task = 30
        per_run_time = 10
        auto = autosklearn.automl.AutoML(backend_api,
                                         time_for_this_task,
                                         per_run_time,
                                         initial_configurations_via_metalearning=25,
                                         metric=accuracy,
                                         )
        setup_logger()
        auto._logger = get_logger('test_fail_if_dummy_prediction_fails')
        auto._backend._make_internals_directory()
        auto._backend.save_datamanager(datamanager)

        # First of all, check that ta.run() is actually called.
        ta_run_mock.return_value = StatusType.SUCCESS, None, None, "test"
        auto._do_dummy_prediction(datamanager, 1)
        ta_run_mock.assert_called_once_with(1, cutoff=time_for_this_task)

        # Case 1. Check that function raises no error when statustype == success.
        # ta.run() returns status, cost, runtime, and additional info.
        ta_run_mock.return_value = StatusType.SUCCESS, None, None, "test"
        raised = False
        try:
            auto._do_dummy_prediction(datamanager, 1)
        except ValueError:
            raised = True
        self.assertFalse(raised, 'Exception raised')

        # Case 2. Check that if statustype returned by ta.run() != success,
        # the function raises error.
        ta_run_mock.return_value = StatusType.CRASHED, None, None, "test"
        self.assertRaisesRegex(ValueError,
                               'Dummy prediction failed with run state StatusType.CRASHED '
                               'and additional output: test.',
                               auto._do_dummy_prediction,
                               datamanager, 1,
                               )
        ta_run_mock.return_value = StatusType.ABORT, None, None, "test"
        self.assertRaisesRegex(ValueError,
                               'Dummy prediction failed with run state StatusType.ABORT '
                               'and additional output: test.',
                               auto._do_dummy_prediction,
                               datamanager, 1,
                               )
        ta_run_mock.return_value = StatusType.TIMEOUT, None, None, "test"
        self.assertRaisesRegex(ValueError,
                               'Dummy prediction failed with run state StatusType.TIMEOUT '
                               'and additional output: test.',
                               auto._do_dummy_prediction,
                               datamanager, 1,
                               )
        ta_run_mock.return_value = StatusType.MEMOUT, None, None, "test"
        self.assertRaisesRegex(ValueError,
                               'Dummy prediction failed with run state StatusType.MEMOUT '
                               'and additional output: test.',
                               auto._do_dummy_prediction,
                               datamanager, 1,
                               )
        ta_run_mock.return_value = StatusType.CAPPED, None, None, "test"
        self.assertRaisesRegex(ValueError,
                               'Dummy prediction failed with run state StatusType.CAPPED '
                               'and additional output: test.',
                               auto._do_dummy_prediction,
                               datamanager, 1,
                               )

        self._tearDown(backend_api.temporary_directory)
        self._tearDown(backend_api.output_directory)
Пример #18
0
def test_automl_outputs(backend, dask_client):

    X_train, Y_train, X_test, Y_test = putil.get_dataset('iris')
    name = 'iris'
    data_manager_file = os.path.join(backend.temporary_directory,
                                     '.auto-sklearn', 'datamanager.pkl')

    auto = autosklearn.automl.AutoML(
        backend,
        30,
        5,
        initial_configurations_via_metalearning=0,
        seed=100,
        metric=accuracy,
        dask_client=dask_client,
    )
    setup_logger()
    auto._logger = get_logger('test_automl_outputs')
    auto.fit(
        X=X_train,
        y=Y_train,
        X_test=X_test,
        y_test=Y_test,
        dataset_name=name,
        task=MULTICLASS_CLASSIFICATION,
    )

    # Log file path
    log_file_path = glob.glob(
        os.path.join(backend.temporary_directory, 'AutoML*.log'))[0]

    # pickled data manager (without one hot encoding!)
    with open(data_manager_file, 'rb') as fh:
        D = pickle.load(fh)
        assert np.allclose(D.data['X_train'], X_train)

    # Check that all directories are there
    fixture = [
        'true_targets_ensemble.npy',
        'start_time_100',
        'datamanager.pkl',
        'ensemble_read_preds.pkl',
        'ensemble_read_scores.pkl',
        'runs',
        'ensembles',
    ]
    assert (sorted(
        os.listdir(os.path.join(backend.temporary_directory,
                                '.auto-sklearn'))) == sorted(fixture))

    # At least one ensemble, one validation, one test prediction and one
    # model and one ensemble
    fixture = glob.glob(
        os.path.join(
            backend.temporary_directory,
            '.auto-sklearn',
            'runs',
            '*',
            'predictions_ensemble*npy',
        ))
    assert len(fixture) > 0

    fixture = glob.glob(
        os.path.join(backend.temporary_directory, '.auto-sklearn', 'runs', '*',
                     '100.*.model'))
    assert len(fixture) > 0

    fixture = os.listdir(
        os.path.join(backend.temporary_directory, '.auto-sklearn',
                     'ensembles'))
    assert '100.0000000000.ensemble' in fixture

    # Start time
    start_time_file_path = os.path.join(backend.temporary_directory,
                                        '.auto-sklearn', "start_time_100")
    with open(start_time_file_path, 'r') as fh:
        start_time = float(fh.read())
    assert time.time() - start_time >= 10, extract_msg_from_log(log_file_path)

    del auto