def test_evaluate_multiclass_classification(self): X_train, Y_train, X_test, Y_test = get_dataset('iris') X_valid = X_test[:25,] Y_valid = Y_test[:25,] X_test = X_test[25:,] Y_test = Y_test[25:,] D = Dummy() D.info = {'metric': 'bac_metric', 'task': MULTICLASS_CLASSIFICATION, 'is_sparse': False, 'target_num': 3} D.data = {'X_train': X_train, 'Y_train': Y_train, 'X_valid': X_valid, 'X_test': X_test} D.feat_type = ['numerical', 'Numerical', 'numerical', 'numerical'] configuration_space = get_configuration_space(D.info, include_estimators = ['ridge'], include_preprocessors = ['select_rates']) err = np.zeros([N_TEST_RUNS]) for i in range(N_TEST_RUNS): print "Evaluate configuration: %d; result:" % i, configuration = configuration_space.sample_configuration() D_ = copy.deepcopy(D) evaluator = HoldoutEvaluator(D_, configuration) if not self._fit(evaluator): print continue err[i] = evaluator.predict() print err[i] self.assertTrue(np.isfinite(err[i])) self.assertGreaterEqual(err[i], 0.0) print "Number of times it was worse than random guessing:" + str(np.sum(err > 1))
def test_5000_classes(self): weights = ([0.0002] * 4750) + ([0.0001] * 250) X, Y = sklearn.datasets.make_classification(n_samples=10000, n_features=20, n_classes=5000, n_clusters_per_class=1, n_informative=15, n_redundant=5, n_repeated=0, weights=weights, flip_y=0, class_sep=1.0, hypercube=True, shift=None, scale=1.0, shuffle=True, random_state=1) self.assertEqual(250, np.sum(np.bincount(Y) == 1)) D = Dummy() D.info = {'metric': 'r2_metric', 'task': MULTICLASS_CLASSIFICATION, 'is_sparse': False, 'target_num': 1} D.data = {'X_train': X, 'Y_train': Y, 'X_valid': X, 'X_test': X} D.feat_type = ['numerical'] * 5000 configuration_space = get_configuration_space(D.info, include_estimators=['extra_trees'], include_preprocessors=['no_preprocessing']) configuration = configuration_space.sample_configuration() D_ = copy.deepcopy(D) evaluator = HoldoutEvaluator(D_, configuration) evaluator.fit()
def test_with_abalone(self): dataset = "abalone" dataset_dir = os.path.join(os.path.dirname(__file__), ".datasets") D = CompetitionDataManager(dataset, dataset_dir) configuration_space = get_configuration_space( D.info, include_estimators=['extra_trees'], include_preprocessors=['no_preprocessing']) errors = [] for i in range(N_TEST_RUNS): configuration = configuration_space.sample_configuration() D_ = copy.deepcopy(D) evaluator = NestedCVEvaluator(D_, configuration, inner_cv_folds=2, outer_cv_folds=2) if not self._fit(evaluator): print continue err = evaluator.predict() self.assertLess(err, 0.99) self.assertTrue(np.isfinite(err)) errors.append(err) # This is a reasonable bound self.assertEqual(10, len(errors)) self.assertLess(min(errors), 0.77)
def test_evaluate_multiclass_classification(self): X_train, Y_train, X_test, Y_test = get_dataset('iris') X_valid = X_test[:25, ] Y_valid = Y_test[:25, ] X_test = X_test[25:, ] Y_test = Y_test[25:, ] D = Dummy() D.info = {'metric': 'bac_metric', 'task': MULTICLASS_CLASSIFICATION, 'is_sparse': False, 'target_num': 3} D.data = {'X_train': X_train, 'Y_train': Y_train, 'X_valid': X_valid, 'X_test': X_test} D.feat_type = ['numerical', 'Numerical', 'numerical', 'numerical'] configuration_space = get_configuration_space(D.info, include_estimators=['ridge'], include_preprocessors=['select_rates']) err = np.zeros([N_TEST_RUNS]) num_models_better_than_random = 0 for i in range(N_TEST_RUNS): print "Evaluate configuration: %d; result:" % i, configuration = configuration_space.sample_configuration() D_ = copy.deepcopy(D) evaluator = CVEvaluator(D_, configuration, with_predictions=True) if not self._fit(evaluator): print continue e_, Y_optimization_pred, Y_valid_pred, Y_test_pred = \ evaluator.predict() err[i] = e_ print err[i], configuration['classifier'] num_targets = len(np.unique(Y_train)) self.assertTrue(np.isfinite(err[i])) self.assertGreaterEqual(err[i], 0.0) # Test that ten models were trained self.assertEqual(len(evaluator.models), 10) self.assertEqual(Y_optimization_pred.shape[0], Y_train.shape[0]) self.assertEqual(Y_optimization_pred.shape[1], num_targets) self.assertEqual(Y_valid_pred.shape[0], Y_valid.shape[0]) self.assertEqual(Y_valid_pred.shape[1], num_targets) self.assertEqual(Y_test_pred.shape[0], Y_test.shape[0]) self.assertEqual(Y_test_pred.shape[1], num_targets) # Test some basic statistics of the dataset if err[i] < 0.5: self.assertTrue(0.3 < Y_valid_pred.mean() < 0.36666) self.assertGreaterEqual(Y_valid_pred.std(), 0.01) self.assertTrue(0.3 < Y_test_pred.mean() < 0.36666) self.assertGreaterEqual(Y_test_pred.std(), 0.01) num_models_better_than_random += 1 self.assertGreater(num_models_better_than_random, 5)
def test_file_output(self): output_dir = os.path.join(os.getcwd(), ".test") try: shutil.rmtree(output_dir) except: pass X_train, Y_train, X_test, Y_test = get_dataset('iris') X_valid = X_test[:25, ] Y_valid = Y_test[:25, ] X_test = X_test[25:, ] Y_test = Y_test[25:, ] D = Dummy() D.info = {'metric': 'bac_metric', 'task': MULTICLASS_CLASSIFICATION, 'is_sparse': False, 'target_num': 3} D.data = {'X_train': X_train, 'Y_train': Y_train, 'X_valid': X_valid, 'X_test': X_test} D.feat_type = ['numerical', 'Numerical', 'numerical', 'numerical'] D.basename = "test" configuration_space = get_configuration_space(D.info) while True: configuration = configuration_space.sample_configuration() evaluator = HoldoutEvaluator(D, configuration, with_predictions=True, all_scoring_functions=True, output_dir=output_dir, output_y_test=True) if not self._fit(evaluator): print continue evaluator.predict() evaluator.file_output() self.assertTrue(os.path.exists(os.path.join(output_dir, "y_optimization.npy"))) break
def test_predict_proba_binary_classification(self): X_train, Y_train, X_test, Y_test = get_dataset('iris') eliminate_class_two = Y_train != 2 X_train = X_train[eliminate_class_two] Y_train = Y_train[eliminate_class_two] eliminate_class_two = Y_test != 2 X_test = X_test[eliminate_class_two] Y_test = Y_test[eliminate_class_two] X_valid = X_test[:25, ] Y_valid = Y_test[:25, ] X_test = X_test[25:, ] Y_test = Y_test[25:, ] class Dummy2(object): def predict_proba(self, y, batch_size=200): return np.array([[0.1, 0.9], [0.7, 0.3]]) model = Dummy2() task_type = BINARY_CLASSIFICATION D = Dummy() D.info = {'metric': 'bac_metric', 'task': task_type, 'is_sparse': False, 'target_num': 3} D.data = {'X_train': X_train, 'Y_train': Y_train, 'X_valid': X_valid, 'X_test': X_test} D.feat_type = ['numerical', 'Numerical', 'numerical', 'numerical'] configuration_space = get_configuration_space( D.info, include_estimators=['ridge'], include_preprocessors=['select_rates']) configuration = configuration_space.sample_configuration() evaluator = HoldoutEvaluator(D, configuration) pred = evaluator.predict_proba(None, model, task_type) expected = [[0.9], [0.3]] for i in range(len(expected)): self.assertEqual(expected[i], pred[i])
def test_with_abalone(self): dataset = "abalone" dataset_dir = os.path.join(os.path.dirname(__file__), ".datasets") D = CompetitionDataManager(dataset, dataset_dir) configuration_space = get_configuration_space(D.info, include_estimators=['extra_trees'], include_preprocessors=['no_preprocessing']) errors = [] for i in range(N_TEST_RUNS): configuration = configuration_space.sample_configuration() D_ = copy.deepcopy(D) evaluator = CVEvaluator(D_, configuration, cv_folds=5) if not self._fit(evaluator): print continue err = evaluator.predict() self.assertLess(err, 0.99) self.assertTrue(np.isfinite(err)) errors.append(err) # This is a reasonable bound self.assertEqual(10, len(errors)) self.assertLess(min(errors), 0.77)
def _fit(self, D): # TODO: check that data and task definition fit together! self.metric_ = D.info['metric'] self.task_ = D.info['task'] self.target_num_ = D.info['target_num'] # Set environment variable: seed = os.environ.get("AUTOSKLEARN_SEED") if seed is not None and int(seed) != self.seed: raise ValueError("It seems you have already started an instance " "of AutoSklearn in this thread.") else: os.environ["AUTOSKLEARN_SEED"] = str(self.seed) # == Split dataset and store Data for the ensemble script X_train, X_ensemble, Y_train, Y_ensemble = split_data.split_data( D.data['X_train'], D.data['Y_train']) true_labels_ensemble_filename = os.path.join(self.tmp_dir, "true_labels_ensemble.npy") true_labels_ensemble_lock = true_labels_ensemble_filename + ".lock" with lockfile.LockFile(true_labels_ensemble_lock): if not os.path.exists(true_labels_ensemble_filename): np.save(true_labels_ensemble_filename, Y_ensemble) del X_train, X_ensemble, Y_train, Y_ensemble time_needed_to_load_data = self.stopwatch_.wall_elapsed(self.basename_) time_left_after_reading = max(0, self.time_left_for_this_task - time_needed_to_load_data) self.logger.info("Remaining time after reading %s %5.2f sec" % (self.basename_, time_left_after_reading)) self.stopwatch_.stop_task("LoadData") # == Calculate metafeatures self.stopwatch_.start_task("CalculateMetafeatures") categorical = [True if feat_type.lower() in ["categorical"] else False for feat_type in D.feat_type] if self.initial_configurations_via_metalearning <= 0: ml = None elif D.info["task"] in \ [MULTICLASS_CLASSIFICATION, BINARY_CLASSIFICATION]: ml = metalearning.MetaLearning() self.logger.debug("Start calculating metafeatures for %s" % self.basename_) ml.calculate_metafeatures_with_labels(D.data["X_train"], D.data["Y_train"], categorical=categorical, dataset_name=self.basename_) else: ml = None self.logger.critical("Metafeatures not calculated") self.stopwatch_.stop_task("CalculateMetafeatures") self.logger.debug("Calculating Metafeatures (categorical attributes) took %5.2f" % self.stopwatch_.wall_elapsed("CalculateMetafeatures")) self.stopwatch_.start_task("OneHot") D.perform1HotEncoding() self.ohe_ = D.encoder_ self.stopwatch_.stop_task("OneHot") # == Pickle the data manager self.stopwatch_.start_task("StoreDatamanager") data_manager_path = os.path.join(self.tmp_dir, self.basename_ + "_Manager.pkl") data_manager_lockfile = data_manager_path + ".lock" with lockfile.LockFile(data_manager_lockfile): if not os.path.exists(data_manager_path): pickle.dump(D, open(data_manager_path, 'w'), protocol=-1) self.logger.debug("Pickled Datamanager at %s" % data_manager_path) else: self.logger.debug("Data manager already presend at %s" % data_manager_path) self.stopwatch_.stop_task("StoreDatamanager") # = Create a searchspace self.stopwatch_.start_task("CreateConfigSpace") configspace_path = os.path.join(self.tmp_dir, "space.pcs") self.configuration_space = paramsklearn.get_configuration_space( D.info) self.configuration_space_created_hook() sp_string = pcs_parser.write(self.configuration_space) configuration_space_lockfile = configspace_path + ".lock" with lockfile.LockFile(configuration_space_lockfile): if not os.path.exists(configspace_path): with open(configspace_path, "w") as fh: fh.write(sp_string) self.logger.debug("Configuration space written to %s" % configspace_path) else: self.logger.debug("Configuration space already present at %s" % configspace_path) self.stopwatch_.stop_task("CreateConfigSpace") if ml is None: initial_configurations = [] elif D.info["task"]in \ [MULTICLASS_CLASSIFICATION, BINARY_CLASSIFICATION]: self.stopwatch_.start_task("CalculateMetafeaturesEncoded") ml.calculate_metafeatures_encoded_labels(X_train=D.data["X_train"], Y_train=D.data["Y_train"], categorical=[False] * D.data["X_train"].shape[0], dataset_name=self.basename_) self.stopwatch_.stop_task("CalculateMetafeaturesEncoded") self.logger.debug( "Calculating Metafeatures (encoded attributes) took %5.2fsec" % self.stopwatch_.wall_elapsed("CalculateMetafeaturesEncoded")) self.logger.debug(ml._metafeatures_labels.__repr__(verbosity=2)) self.logger.debug(ml._metafeatures_encoded_labels.__repr__(verbosity=2)) self.stopwatch_.start_task("InitialConfigurations") try: initial_configurations = ml.create_metalearning_string_for_smac_call( self.configuration_space, self.basename_, self.metric_, self.task_, True if D.info['is_sparse'] == 1 else False, self.initial_configurations_via_metalearning, self.metadata_directory) except Exception as e: import traceback self.logger.error(str(e)) self.logger.error(traceback.format_exc()) initial_configurations = [] self.stopwatch_.stop_task("InitialConfigurations") self.logger.debug("Initial Configurations: (%d)", len(initial_configurations)) for initial_configuration in initial_configurations: self.logger.debug(initial_configuration) self.logger.debug("Looking for initial configurations took %5.2fsec" % self.stopwatch_.wall_elapsed("InitialConfigurations")) self.logger.info( "Time left for %s after finding initial configurations: %5.2fsec" % (self.basename_, self.time_left_for_this_task - self.stopwatch_.wall_elapsed(self.basename_))) else: initial_configurations = [] self.logger.critical("Metafeatures encoded not calculated") # == Set up a directory where all the trained models will be pickled to if self.keep_models: self.model_directory_ = os.path.join(self.tmp_dir, "models_%d" % self.seed) os.mkdir(self.model_directory_) self.ensemble_indices_directory_ = os.path.join(self.tmp_dir, "ensemble_indices_%d" % self.seed) os.mkdir(self.ensemble_indices_directory_) # == RUN SMAC self.stopwatch_.start_task("runSmac") # = Create an empty instance file instance_file = os.path.join(self.tmp_dir, "instances.txt") instance_file_lock = instance_file + ".lock" with lockfile.LockFile(instance_file_lock): if not os.path.exists(instance_file_lock): with open(instance_file, "w") as fh: fh.write("holdout") self.logger.debug("Created instance file %s" % instance_file) else: self.logger.debug("Instance file already present at %s" % instance_file) # = Start SMAC time_left_for_smac = max(0, self.time_left_for_this_task - ( self.stopwatch_.wall_elapsed(self.basename_))) self.logger.debug("Start SMAC with %5.2fsec time left" % time_left_for_smac) proc_smac, smac_call = \ submit_process.run_smac(dataset_name=self.basename_, dataset=data_manager_path, tmp_dir=self.tmp_dir, searchspace=configspace_path, instance_file=instance_file, limit=time_left_for_smac, cutoff_time=self.per_run_time_limit, initial_challengers=initial_configurations, memory_limit=self.ml_memory_limit, seed=self.seed) self.logger.debug(smac_call) self.stopwatch_.stop_task("runSmac") # == RUN ensemble builder self.stopwatch_.start_task("runEnsemble") time_left_for_ensembles = max(0, self.time_left_for_this_task - ( self.stopwatch_.wall_elapsed(self.basename_))) self.logger.debug("Start Ensemble with %5.2fsec time left" % time_left_for_ensembles) proc_ensembles = \ submit_process.run_ensemble_builder(tmp_dir=self.tmp_dir, dataset_name=self.basename_, task_type=self.task_, metric=self.metric_, limit=time_left_for_ensembles, output_dir=self.output_dir, ensemble_size=self.ensemble_size, ensemble_nbest=self.ensemble_nbest, seed=self.seed, ensemble_indices_output_dir=self.ensemble_indices_directory_) self.stopwatch_.stop_task("runEnsemble") del D if self.queue is not None: self.queue.put([time_needed_to_load_data, data_manager_path, proc_smac, proc_ensembles]) else: proc_smac.wait() proc_ensembles.wait() # Delete AutoSklearn environment variable del os.environ["AUTOSKLEARN_SEED"] return self
def test_metalearning(self): dataset_name = 'digits' initial_challengers = { 'acc_metric': [ "--initial-challengers \" " "-adaboost:algorithm 'SAMME.R' " "-adaboost:learning_rate '0.400363929326' " "-adaboost:max_depth '5' " "-adaboost:n_estimators '319' " "-balancing:strategy 'none' " "-classifier 'adaboost' " "-imputation:strategy 'most_frequent' " "-preprocessor 'no_preprocessing' " "-rescaling:strategy 'min/max'\"" ], 'auc_metric': [ "--initial-challengers \" " "-adaboost:algorithm 'SAMME.R' " "-adaboost:learning_rate '0.966883114819' " "-adaboost:max_depth '5' " "-adaboost:n_estimators '412' " "-balancing:strategy 'weighting' " "-classifier 'adaboost' " "-imputation:strategy 'median' " "-preprocessor 'no_preprocessing' " "-rescaling:strategy 'min/max'\"" ], 'bac_metric': [ "--initial-challengers \" " "-adaboost:algorithm 'SAMME.R' " "-adaboost:learning_rate '0.400363929326' " "-adaboost:max_depth '5' " "-adaboost:n_estimators '319' " "-balancing:strategy 'none' " "-classifier 'adaboost' " "-imputation:strategy 'most_frequent' " "-preprocessor 'no_preprocessing' " "-rescaling:strategy 'min/max'\"" ], 'f1_metric': [ "--initial-challengers \" " "-adaboost:algorithm 'SAMME.R' " "-adaboost:learning_rate '0.966883114819' " "-adaboost:max_depth '5' " "-adaboost:n_estimators '412' " "-balancing:strategy 'weighting' " "-classifier 'adaboost' " "-imputation:strategy 'median' " "-preprocessor 'no_preprocessing' " "-rescaling:strategy 'min/max'\"" ], 'pac_metric': [ "--initial-challengers \" " "-adaboost:algorithm 'SAMME.R' " "-adaboost:learning_rate '0.400363929326' " "-adaboost:max_depth '5' " "-adaboost:n_estimators '319' " "-balancing:strategy 'none' " "-classifier 'adaboost' " "-imputation:strategy 'most_frequent' " "-preprocessor 'no_preprocessing' " "-rescaling:strategy 'min/max'\"" ] } for metric in initial_challengers: configuration_space = get_configuration_space( { 'metric': metric, 'task': MULTICLASS_CLASSIFICATION, 'is_sparse': False }, include_preprocessors=['no_preprocessing']) X_train, Y_train, X_test, Y_test = get_dataset(dataset_name) categorical = [False] * X_train.shape[1] ml = MetaLearning() ml.calculate_metafeatures_with_labels(X_train, Y_train, categorical, dataset_name) ml.calculate_metafeatures_encoded_labels(X_train, Y_train, categorical, dataset_name) initial_configuration_strings_for_smac = \ ml.create_metalearning_string_for_smac_call( configuration_space, dataset_name, metric, MULTICLASS_CLASSIFICATION, False, 1, None) print metric self.assertEqual(initial_challengers[metric], initial_configuration_strings_for_smac)
def test_metalearning(self): dataset_name = 'digits' initial_challengers = {'acc_metric': ["--initial-challengers \" " "-adaboost:algorithm 'SAMME.R' " "-adaboost:learning_rate '0.400363929326' " "-adaboost:max_depth '5' " "-adaboost:n_estimators '319' " "-balancing:strategy 'none' " "-classifier 'adaboost' " "-imputation:strategy 'most_frequent' " "-preprocessor 'no_preprocessing' " "-rescaling:strategy 'min/max'\"" ], 'auc_metric': ["--initial-challengers \" " "-adaboost:algorithm 'SAMME.R' " "-adaboost:learning_rate '0.966883114819' " "-adaboost:max_depth '5' " "-adaboost:n_estimators '412' " "-balancing:strategy 'weighting' " "-classifier 'adaboost' " "-imputation:strategy 'median' " "-preprocessor 'no_preprocessing' " "-rescaling:strategy 'min/max'\"" ], 'bac_metric': ["--initial-challengers \" " "-adaboost:algorithm 'SAMME.R' " "-adaboost:learning_rate '0.400363929326' " "-adaboost:max_depth '5' " "-adaboost:n_estimators '319' " "-balancing:strategy 'none' " "-classifier 'adaboost' " "-imputation:strategy 'most_frequent' " "-preprocessor 'no_preprocessing' " "-rescaling:strategy 'min/max'\"" ], 'f1_metric': ["--initial-challengers \" " "-adaboost:algorithm 'SAMME.R' " "-adaboost:learning_rate '0.966883114819' " "-adaboost:max_depth '5' " "-adaboost:n_estimators '412' " "-balancing:strategy 'weighting' " "-classifier 'adaboost' " "-imputation:strategy 'median' " "-preprocessor 'no_preprocessing' " "-rescaling:strategy 'min/max'\"" ], 'pac_metric': ["--initial-challengers \" " "-adaboost:algorithm 'SAMME.R' " "-adaboost:learning_rate '0.400363929326' " "-adaboost:max_depth '5' " "-adaboost:n_estimators '319' " "-balancing:strategy 'none' " "-classifier 'adaboost' " "-imputation:strategy 'most_frequent' " "-preprocessor 'no_preprocessing' " "-rescaling:strategy 'min/max'\"" ]} for metric in initial_challengers: configuration_space = get_configuration_space( {'metric': metric, 'task': MULTICLASS_CLASSIFICATION, 'is_sparse': False}, include_preprocessors=['no_preprocessing']) X_train, Y_train, X_test, Y_test = get_dataset(dataset_name) categorical = [False] * X_train.shape[1] ml = MetaLearning() ml.calculate_metafeatures_with_labels( X_train, Y_train, categorical, dataset_name) ml.calculate_metafeatures_encoded_labels( X_train, Y_train, categorical, dataset_name) initial_configuration_strings_for_smac = \ ml.create_metalearning_string_for_smac_call( configuration_space, dataset_name, metric, MULTICLASS_CLASSIFICATION, False, 1, None) print metric self.assertEqual(initial_challengers[metric], initial_configuration_strings_for_smac)
def main(dataset_info, mode, seed, params, mode_args=None): """This command line interface has three different operation modes: * CV: useful for the Tweakathon * 1/3 test split: useful to evaluate a configuration * cv on 2/3 train split: useful to optimize hyperparameters in a training mode before testing a configuration on the 1/3 test split. It must by no means be used for the Auto part of the competition! """ if mode != "test": num_run = get_new_run_num() for key in params: try: params[key] = int(params[key]) except: try: params[key] = float(params[key]) except: pass if seed is not None: seed = int(float(seed)) else: seed = 1 output_dir = os.getcwd() D = store_and_or_load_data(dataset_info=dataset_info, outputdir=output_dir) cs = get_configuration_space(D.info) configuration = configuration_space.Configuration(cs, params) metric = D.info['metric'] global evaluator # Train/test split if mode == 'holdout': evaluator = HoldoutEvaluator(D, configuration, with_predictions=True, all_scoring_functions=True, output_y_test=True, seed=seed, num_run=num_run) evaluator.fit() signal.signal(15, empty_signal_handler) evaluator.finish_up() model_directory = os.path.join(os.getcwd(), "models_%d" % seed) if os.path.exists(model_directory): model_filename = os.path.join(model_directory, "%s.model" % num_run) with open(model_filename, "w") as fh: pickle.dump(evaluator.model, fh, -1) elif mode == 'test': evaluator = TestEvaluator(D, configuration, all_scoring_functions=True, seed=seed) evaluator.fit() scores = evaluator.predict() duration = time.time() - evaluator.starttime score = scores[metric] additional_run_info = ";".join( ["%s: %s" % (m_, value) for m_, value in scores.items()]) additional_run_info += ";" + "duration: " + str(duration) print "Result for ParamILS: %s, %f, 1, %f, %d, %s" % ( "SAT", abs(duration), score, evaluator.seed, additional_run_info) # CV on the whole training set elif mode == 'cv': evaluator = CVEvaluator(D, configuration, with_predictions=True, all_scoring_functions=True, output_y_test=True, cv_folds=mode_args['folds'], seed=seed, num_run=num_run) evaluator.fit() signal.signal(15, empty_signal_handler) evaluator.finish_up() elif mode == 'partial_cv': evaluator = CVEvaluator(D, configuration, all_scoring_functions=True, cv_folds=mode_args['folds'], seed=seed, num_run=num_run) evaluator.partial_fit(mode_args['fold']) scores = evaluator.predict() duration = time.time() - evaluator.starttime score = scores[metric] additional_run_info = ";".join( ["%s: %s" % (m_, value) for m_, value in scores.items()]) additional_run_info += ";" + "duration: " + str(duration) print "Result for ParamILS: %s, %f, 1, %f, %d, %s" % ( "SAT", abs(duration), score, evaluator.seed, additional_run_info) elif mode == 'nested-cv': evaluator = NestedCVEvaluator(D, configuration, with_predictions=True, inner_cv_folds=mode_args['inner_folds'], outer_cv_folds=mode_args['outer_folds'], all_scoring_functions=True, output_y_test=True, seed=seed, num_run=num_run) evaluator.fit() signal.signal(15, empty_signal_handler) evaluator.finish_up() else: raise ValueError("Must choose a legal mode.")