def test_do_dummy_prediction(self): for name in ['401_bac', '31_bac', 'adult', 'cadata']: output = os.path.join(self.test_dir, '..', '.tmp_test_do_dummy_prediction') self._setUp(output) dataset = os.path.join(self.test_dir, '..', '.data', name) backend_api = backend.create(output, output) auto = autosklearn.automl.AutoML( backend_api, 20, 5, initial_configurations_via_metalearning=25) setup_logger() auto._logger = get_logger('test_do_dummy_predictions') auto._backend._make_internals_directory() D = load_data(dataset, backend_api) auto._backend.save_datamanager(D) auto._do_dummy_prediction(D, 1) # Ensure that the dummy predictions are not in the current working # directory, but in the output directory (under output) self.assertFalse(os.path.exists(os.path.join(os.getcwd(), '.auto-sklearn'))) self.assertTrue(os.path.exists(os.path.join( output, '.auto-sklearn', 'predictions_ensemble', 'predictions_ensemble_1_00001.npy'))) del auto self._tearDown(output)
def fit(self, X, y, task=MULTICLASS_CLASSIFICATION, metric='acc_metric', feat_type=None, dataset_name=None): if dataset_name is None: m = hashlib.md5() m.update(X.data) dataset_name = m.hexdigest() self._backend.save_start_time(self._seed) self._stopwatch = StopWatch() self._dataset_name = dataset_name self._stopwatch.start_task(self._dataset_name) logger_name = 'AutoML(%d):%s' % (self._seed, dataset_name) setup_logger(os.path.join(self._tmp_dir, '%s.log' % str(logger_name))) self._logger = get_logger(logger_name) if isinstance(metric, str): metric = STRING_TO_METRIC[metric] loaded_data_manager = XYDataManager(X, y, task=task, metric=metric, feat_type=feat_type, dataset_name=dataset_name, encode_labels=False) return self._fit(loaded_data_manager)
def test_do_dummy_prediction(self): for name in ['401_bac', '31_bac', 'adult', 'cadata']: output = os.path.join(self.test_dir, '..', '.tmp_test_do_dummy_prediction') self._setUp(output) dataset = os.path.join(self.test_dir, '..', '.data', name) auto = autosklearn.automl.AutoML( output, output, 15, 15, initial_configurations_via_metalearning=25) setup_logger() auto._logger = get_logger('test_do_dummy_predictions') auto._backend._make_internals_directory() D = store_and_or_load_data(dataset, output) auto._do_dummy_prediction(D) # Assure that the dummy predictions are not in the current working # directory, but in the output directory (under output) self.assertFalse(os.path.exists(os.path.join(os.getcwd(), '.auto-sklearn'))) self.assertTrue(os.path.exists(os.path.join(output, '.auto-sklearn'))) del auto self._tearDown(output)
def _get_logger(self, name): logger_name = 'AutoML(%d):%s' % (self._seed, name) setup_logger(os.path.join(self._backend.temporary_directory, '%s.log' % str(logger_name)), self.logging_config, ) return get_logger(logger_name)
def fit(self, X, y, task=MULTICLASS_CLASSIFICATION, metric="acc_metric", feat_type=None, dataset_name=None): if dataset_name is None: m = hashlib.md5() m.update(X.data) dataset_name = m.hexdigest() self._backend.save_start_time(self._seed) self._stopwatch = StopWatch() self._dataset_name = dataset_name self._stopwatch.start_task(self._dataset_name) logger_name = "AutoML(%d):%s" % (self._seed, dataset_name) setup_logger(os.path.join(self._tmp_dir, "%s.log" % str(logger_name))) self._logger = get_logger(logger_name) if isinstance(metric, str): metric = STRING_TO_METRIC[metric] if feat_type is not None and len(feat_type) != X.shape[1]: raise ValueError( "Array feat_type does not have same number of " "variables as X has features. %d vs %d." % (len(feat_type), X.shape[1]) ) if feat_type is not None and not all([isinstance(f, bool) for f in feat_type]): raise ValueError("Array feat_type must only contain bools.") loaded_data_manager = XYDataManager( X, y, task=task, metric=metric, feat_type=feat_type, dataset_name=dataset_name, encode_labels=False ) return self._fit(loaded_data_manager)
def test_do_dummy_prediction(self): for name in ['401_bac', '31_bac', 'adult', 'cadata']: output = os.path.join(self.test_dir, '..', '.tmp_test_do_dummy_prediction') self._setUp(output) dataset = os.path.join(self.test_dir, '..', '.data', name) auto = autosklearn.automl.AutoML( output, output, 15, 15, initial_configurations_via_metalearning=25) setup_logger() auto._logger = get_logger('test_do_dummy_predictions') auto._backend._make_internals_directory() D = store_and_or_load_data(dataset, output) auto._do_dummy_prediction(D) # Assure that the dummy predictions are not in the current working # directory, but in the output directory (under output) self.assertFalse( os.path.exists(os.path.join(os.getcwd(), '.auto-sklearn'))) self.assertTrue( os.path.exists(os.path.join(output, '.auto-sklearn'))) del auto self._tearDown(output)
def test_do_dummy_prediction(self): for name in ['401_bac', '31_bac', 'adult', 'cadata']: backend_api = self._create_backend('test_do_dummy_prediction') dataset = os.path.join(self.test_dir, '..', '.data', name) auto = autosklearn.automl.AutoML( backend_api, 20, 5, initial_configurations_via_metalearning=25) setup_logger() auto._logger = get_logger('test_do_dummy_predictions') auto._backend._make_internals_directory() D = load_data(dataset, backend_api) auto._backend.save_datamanager(D) auto._do_dummy_prediction(D, 1) # Ensure that the dummy predictions are not in the current working # directory, but in the temporary directory. self.assertFalse(os.path.exists(os.path.join(os.getcwd(), '.auto-sklearn'))) self.assertTrue(os.path.exists(os.path.join( backend_api.temporary_directory, '.auto-sklearn', 'predictions_ensemble', 'predictions_ensemble_1_1.npy'))) del auto self._tearDown(backend_api.temporary_directory) self._tearDown(backend_api.output_directory)
def start_automl(self, parser): self._backend.save_start_time(self._seed) self._stopwatch = StopWatch() datamanager = get_data_manager(namespace=parser) self._stopwatch.start_task(datamanager.name) logger_name = 'AutoML(%d):%s' % (self._seed, datamanager.name) setup_logger(os.path.join(self._tmp_dir, '%s.log' % str(logger_name))) self._logger = get_logger(logger_name) self._datamanager = datamanager self._dataset_name = datamanager.name self.start()
def fit(self, X, y, task=MULTICLASS_CLASSIFICATION, metric='acc_metric', feat_type=None, dataset_name=None): if dataset_name is None: m = hashlib.md5() m.update(X.data) dataset_name = m.hexdigest() self._backend.save_start_time(self._seed) self._stopwatch = StopWatch() self._dataset_name = dataset_name self._stopwatch.start_task(self._dataset_name) logger_name = 'AutoML(%d):%s' % (self._seed, dataset_name) setup_logger(os.path.join(self._tmp_dir, '%s.log' % str(logger_name))) self._logger = get_logger(logger_name) if isinstance(metric, str): metric = STRING_TO_METRIC[metric] if feat_type is not None and len(feat_type) != X.shape[1]: raise ValueError('Array feat_type does not have same number of ' 'variables as X has features. %d vs %d.' % (len(feat_type), X.shape[1])) if feat_type is not None and not all([isinstance(f, str) for f in feat_type]): raise ValueError('Array feat_type must only contain strings.') if feat_type is not None: for ft in feat_type: if ft.lower() not in ['categorical', 'numerical']: raise ValueError('Only `Categorical` and `Numerical` are ' 'valid feature types, you passed `%s`' % ft) loaded_data_manager = XYDataManager(X, y, task=task, metric=metric, feat_type=feat_type, dataset_name=dataset_name, encode_labels=False) return self._fit(loaded_data_manager)
def fit_automl_dataset(self, dataset): self._stopwatch = StopWatch() self._backend.save_start_time(self._seed) name = os.path.basename(dataset) self._stopwatch.start_task(name) self._start_task(self._stopwatch, name) self._dataset_name = name logger_name = 'AutoML(%d):%s' % (self._seed, name) setup_logger(os.path.join(self._tmp_dir, '%s.log' % str(logger_name))) self._logger = get_logger(logger_name) self._logger.debug('======== Reading and converting data ==========') # Encoding the labels will be done after the metafeature calculation! loaded_data_manager = CompetitionDataManager(dataset, encode_labels=False) loaded_data_manager_str = str(loaded_data_manager).split('\n') for part in loaded_data_manager_str: self._logger.debug(part) return self._fit(loaded_data_manager)
def fit_automl_dataset(self, dataset): self._stopwatch = StopWatch() self._backend.save_start_time(self._seed) name = os.path.basename(dataset) self._stopwatch.start_task(name) self._start_task(self._stopwatch, name) self._dataset_name = name logger_name = 'AutoML(%d):%s' % (self._seed, name) setup_logger(os.path.join(self._tmp_dir, '%s.log' % str(logger_name))) self._logger = get_logger(logger_name) self._logger.debug('======== Reading and converting data ==========') # Encoding the labels will be done after the metafeature calculation! loaded_data_manager = CompetitionDataManager( dataset, encode_labels=False, max_memory_in_mb=float(self._ml_memory_limit) / 3) loaded_data_manager_str = str(loaded_data_manager).split('\n') for part in loaded_data_manager_str: self._logger.debug(part) return self._fit(loaded_data_manager)
def _get_logger(self, name): logger_name = 'AutoML(%d):%s' % (self._seed, name) setup_logger(os.path.join(self._tmp_dir, '%s.log' % str(logger_name))) return get_logger(logger_name)
def test_fail_if_dummy_prediction_fails(self, ta_run_mock): backend_api = self._create_backend( 'test_fail_if_dummy_prediction_fails') dataset = os.path.join(self.test_dir, '..', '.data', '401_bac') time_for_this_task = 30 per_run_time = 10 auto = autosklearn.automl.AutoML( backend_api, time_for_this_task, per_run_time, initial_configurations_via_metalearning=25, ) setup_logger() auto._logger = get_logger('test_fail_if_dummy_prediction_fails') auto._backend._make_internals_directory() D = load_data(dataset, backend_api) auto._backend.save_datamanager(D) # First of all, check that ta.run() is actually called. ta_run_mock.return_value = StatusType.SUCCESS, None, None, "test" auto._do_dummy_prediction(D, 1) ta_run_mock.assert_called_once_with(1, cutoff=time_for_this_task) # Case 1. Check that function raises no error when statustype == success. # ta.run() returns status, cost, runtime, and additional info. ta_run_mock.return_value = StatusType.SUCCESS, None, None, "test" raised = False try: auto._do_dummy_prediction(D, 1) except ValueError: raised = True self.assertFalse(raised, 'Exception raised') # Case 2. Check that if statustype returned by ta.run() != success, # the function raises error. ta_run_mock.return_value = StatusType.CRASHED, None, None, "test" self.assertRaisesRegex( ValueError, 'Dummy prediction failed: test', auto._do_dummy_prediction, D, 1, ) ta_run_mock.return_value = StatusType.ABORT, None, None, "test" self.assertRaisesRegex( ValueError, 'Dummy prediction failed: test', auto._do_dummy_prediction, D, 1, ) ta_run_mock.return_value = StatusType.TIMEOUT, None, None, "test" self.assertRaisesRegex( ValueError, 'Dummy prediction failed: test', auto._do_dummy_prediction, D, 1, ) ta_run_mock.return_value = StatusType.MEMOUT, None, None, "test" self.assertRaisesRegex( ValueError, 'Dummy prediction failed: test', auto._do_dummy_prediction, D, 1, ) ta_run_mock.return_value = StatusType.CAPPED, None, None, "test" self.assertRaisesRegex( ValueError, 'Dummy prediction failed: test', auto._do_dummy_prediction, D, 1, ) self._tearDown(backend_api.temporary_directory) self._tearDown(backend_api.output_directory)
def _get_logger(self, name): logger_name = 'AutoML(%d):%s' % (self._seed, name) setup_logger(os.path.join(self._backend.temporary_directory, '%s.log' % str(logger_name))) return get_logger(logger_name)