def test_fit_n_jobs(self, _fit_automl_patch, Process_patch, build_automl_patch): # Return the process patch on call to __init__ Process_patch.return_value = Process_patch cls = AutoSklearnEstimator() cls.fit() self.assertEqual(build_automl_patch.call_count, 1) self.assertEqual(len(build_automl_patch.call_args[0]), 0) self.assertEqual( build_automl_patch.call_args[1], { 'seed': 1, 'shared_mode': False, 'ensemble_size': 50, 'initial_configurations_via_metalearning': 25, 'output_folder': None, 'tmp_folder': None }, ) self.assertEqual(Process_patch.call_count, 0) cls = AutoSklearnEstimator(n_jobs=5) cls.fit() # Plus the one from the first call self.assertEqual(build_automl_patch.call_count, 6) self.assertEqual(len(cls._automl), 5) for i in range(1, 6): self.assertEqual(len(build_automl_patch.call_args_list[i][0]), 0) self.assertEqual(len(build_automl_patch.call_args_list[i][1]), 7) # Thee seed is a magic mock so there is nothing to compare here... self.assertIn('seed', build_automl_patch.call_args_list[i][1]) self.assertEqual( build_automl_patch.call_args_list[i][1]['shared_mode'], True, ) self.assertEqual( build_automl_patch.call_args_list[i][1]['ensemble_size'], 50 if i == 1 else 0, ) self.assertEqual( build_automl_patch.call_args_list[i][1] ['initial_configurations_via_metalearning'], 25 if i == 1 else 0, ) if i > 1: self.assertEqual( build_automl_patch.call_args_list[i][1] ['smac_scenario_args']['initial_incumbent'], 'RANDOM', ) self.assertEqual(Process_patch.start.call_count, 4) for i in range(2, 6): self.assertEqual( len(Process_patch.call_args_list[i - 2][1]['kwargs']), 3, ) self.assertFalse( Process_patch.call_args_list[i - 2][1]['kwargs']['load_models']) self.assertEqual(Process_patch.join.call_count, 4) self.assertEqual(_fit_automl_patch.call_count, 1) self.assertEqual(len(_fit_automl_patch.call_args[0]), 0) self.assertEqual(len(_fit_automl_patch.call_args[1]), 3) self.assertTrue(_fit_automl_patch.call_args[1]['load_models'])
def test_fit_n_jobs_negative(build_automl_patch): n_cores = cpu_count() cls = AutoSklearnEstimator(n_jobs=-1, ensemble_size=0) cls.fit() assert cls._n_jobs == n_cores
def test_fit_n_jobs_negative(self, build_automl_patch): n_cores = cpu_count() cls = AutoSklearnEstimator(n_jobs=-1) cls.fit() self.assertEqual(len(cls._automl), n_cores)
def test_leaderboard(tmp_dir: str, estimator_type: Type[AutoSklearnEstimator], dataset_name: str): # Comprehensive test tasks a substantial amount of time, manually set if # required. MAX_COMBO_SIZE_FOR_INCLUDE_PARAM = 3 # [0, len(valid_columns) + 1] column_types = AutoSklearnEstimator._leaderboard_columns() # Create a dict of all possible param values for each param # with some invalid one's of the incorrect type include_combinations = itertools.chain( itertools.combinations(column_types['all'], item_count) for item_count in range(1, MAX_COMBO_SIZE_FOR_INCLUDE_PARAM)) valid_params = { 'detailed': [True, False], 'ensemble_only': [True, False], 'top_k': [-10, 0, 1, 10, 'all'], 'sort_by': [*column_types['all'], 'invalid'], 'sort_order': ['ascending', 'descending', 'auto', 'invalid', None], 'include': itertools.chain([None, 'invalid', 'type'], include_combinations), } # Create a generator of all possible combinations of valid_params params_generator = iter( dict(zip(valid_params.keys(), param_values)) for param_values in itertools.product(*valid_params.values())) X_train, Y_train, _, _ = putil.get_dataset(dataset_name) model = estimator_type(time_left_for_this_task=30, per_run_time_limit=5, tmp_folder=tmp_dir, seed=1) model.fit(X_train, Y_train) for params in params_generator: # Convert from iterator to solid list if params['include'] is not None and not isinstance( params['include'], str): params['include'] = list(params['include']) # Invalid top_k should raise an error, is a positive int or 'all' if not (params['top_k'] == 'all' or params['top_k'] > 0): with pytest.raises(ValueError): model.leaderboard(**params) # Invalid sort_by column elif params['sort_by'] not in column_types['all']: with pytest.raises(ValueError): model.leaderboard(**params) # Shouldn't accept an invalid sort order elif params['sort_order'] not in ['ascending', 'descending', 'auto']: with pytest.raises(ValueError): model.leaderboard(**params) # include is single str but not valid elif (isinstance(params['include'], str) and params['include'] not in column_types['all']): with pytest.raises(ValueError): model.leaderboard(**params) # Crash if include is list but contains invalid column elif (isinstance(params['include'], list) and len(set(params['include']) - set(column_types['all'])) != 0): with pytest.raises(ValueError): model.leaderboard(**params) # Can't have just model_id, in both single str and list case elif (params['include'] == 'model_id' or params['include'] == ['model_id']): with pytest.raises(ValueError): model.leaderboard(**params) # Else all valid combinations should be validated else: leaderboard = model.leaderboard(**params) # top_k should never be less than the rows given back # It can however be larger if isinstance(params['top_k'], int): assert params['top_k'] >= len(leaderboard) # Check the right columns are present and in the right order # The model_id is set as the index, not included in pandas columns columns = list(leaderboard.columns) def exclude(lst, s): return [x for x in lst if x != s] if params['include'] is not None: # Include with only single str should be the only column if isinstance(params['include'], str): assert params['include'] in columns and len(columns) == 1 # Include as a list should have all the columns without model_id else: assert columns == exclude(params['include'], 'model_id') elif params['detailed']: assert columns == exclude(column_types['detailed'], 'model_id') else: assert columns == exclude(column_types['simple'], 'model_id') # Ensure that if it's ensemble only # Can only check if 'ensemble_weight' is present if (params['ensemble_only'] and 'ensemble_weight' in columns): assert all(leaderboard['ensemble_weight'] > 0)