def test_basic(self): bandit = self._bandit_cls() algo = Random(bandit) trials = Trials() experiment = Experiment(trials, algo, async=False) experiment.max_queue_len = 50 experiment.run(self._n_steps) print print self._bandit_cls print bandit.loss_target print trials.average_best_error(bandit) assert trials.average_best_error(bandit) - bandit.loss_target < .2 print
def test_basic(self): bandit = self._bandit_cls() print 'bandit params', bandit.params algo = Random(bandit) print 'algo params', algo.vh.params trials = Trials() experiment = Experiment(trials, algo, async=False) experiment.catch_bandit_exceptions = False experiment.max_queue_len = 50 experiment.run(self._n_steps) print print self._bandit_cls print bandit.loss_target print trials.average_best_error(bandit) assert trials.average_best_error(bandit) - bandit.loss_target < .2 print
def opt_method(hsidata, initializers, resdir, max_evals): dataset_name = hsidata.dataset_name __location__ = os.path.realpath(os.path.join(os.getcwd(), os.path.dirname(__file__))) configpath = os.path.join(__location__, 'datasets.cfg') parser = ConfigParser() parser.read(configpath) max_iter = parser.getint(dataset_name, 'max_iter') def objective_func(hsidata, hyperpars): Y = hsidata.data ref_endmembers = hsidata.ref_endmembers initializer = hyperpars.pop('initializer') init_endmembers = initials[initializer][0] init_abundances = initials[initializer][1] A, S, J, SAD = lhalf(ref_endmembers, init_endmembers, init_abundances, Y, **hyperpars, verbose=True) MSE = mse(Y, A, np.transpose(S)) S = S.reshape(hsidata.n_rows, hsidata.n_cols, hsidata.n_endmembers).transpose((1, 0, 2)) results = {'endmembers': A, 'abundances': S, 'loss': J, 'SAD': SAD, 'MSE': MSE} loss = SAD[-1] * (1 + np.std(np.sum(S, -1).flatten())) * (1 + np.abs(1 - np.mean(np.sum(S, -1).flatten()))) return {'loss': loss, 'status': STATUS_OK, 'attachments': results} initials = {} initial_keys = [] for key, value in initializers.items(): initial_keys.append(key) initials[key] = (hsidata.initialize(value)) space = { 'max_iter': max_iter, 'q': hp.uniform('lhalf_' + dataset_name + '_q', 0, 1), 'delta': hp.lognormal('lhalf_' + dataset_name + '_delta', 0, 2), 'initializer': hp.choice('lhalf_' + dataset_name + '_initializer', initializers) } h = [hp.lognormal('lhalf_' + dataset_name + '_h' + str(i), 0, 1) for i in range(hsidata.n_endmembers)] space['h'] = h trials = Trials() pars = fmin(lambda x: objective_func(hsidata, x), space=space, algo=tpe.suggest, max_evals=max_evals, trials=trials, rstate=np.random.RandomState(random_seed)) improvements = reduce(improvement_only, trials.losses(), []) save_config(resdir, dataset_name, pars, trials.average_best_error()) print(enumerate(initial_keys)) return improvements, pars, trials
def test_basic(self): bandit = self._bandit_cls() #print 'bandit params', bandit.params, bandit #print 'algo params', algo.vh.params trials = Trials() fmin(lambda x: x, bandit.expr, trials=trials, algo=suggest, max_evals=self._n_steps) assert trials.average_best_error(bandit) - bandit.loss_target < .2
def test_basic(self): domain = self._domain_cls() # print 'domain params', domain.params, domain # print 'algo params', algo.vh.params trials = Trials() fmin(lambda x: x, domain.expr, trials=trials, algo=suggest, max_evals=self._n_steps) assert trials.average_best_error(domain) - domain.loss_target < .2
class HyperoptRegressorImpl: def __init__(self, estimator=None, max_evals=50, cv=5, handle_cv_failure=False, scoring='r2', best_score=1.0, max_opt_time=None, pgo: Optional[PGO] = None): self.max_evals = max_evals if estimator is None: self.estimator = RandomForestRegressor else: self.estimator = estimator self.search_space = hp.choice( 'meta_model', [hyperopt_search_space(self.estimator, pgo=pgo)]) self.scoring = scoring self.best_score = best_score self.handle_cv_failure = handle_cv_failure self.cv = cv self.trials = Trials() self.max_opt_time = max_opt_time def fit(self, X_train, y_train): opt_start_time = time.time() def hyperopt_train_test(params, X_train, y_train): warnings.filterwarnings("ignore") reg = create_instance_from_hyperopt_search_space( self.estimator, params) try: cv_score, _, execution_time = cross_val_score_track_trials( reg, X_train, y_train, cv=KFold(self.cv), scoring=self.scoring) logger.debug("Successful trial of hyperopt") except BaseException as e: #If there is any error in cross validation, use the accuracy based on a random train-test split as the evaluation criterion if self.handle_cv_failure: X_train_part, X_validation, y_train_part, y_validation = train_test_split( X_train, y_train, test_size=0.20) start = time.time() reg_trained = reg.fit(X_train_part, y_train_part) scorer = check_scoring(reg, scoring=self.scoring) cv_score = scorer(reg_trained, X_validation, y_validation) execution_time = time.time() - start else: logger.debug(e) logger.debug("Error {} with pipeline:{}".format( e, reg.to_json())) raise e return cv_score, execution_time def get_final_trained_reg(params, X_train, y_train): warnings.filterwarnings("ignore") reg = create_instance_from_hyperopt_search_space( self.estimator, params) reg = reg.fit(X_train, y_train) return reg def f(params): current_time = time.time() if (self.max_opt_time is not None) and ( (current_time - opt_start_time) > self.max_opt_time): # if max optimization time set, and we have crossed it, exit optimization completely sys.exit(0) return_dict = {} try: score, execution_time = hyperopt_train_test(params, X_train=X_train, y_train=y_train) return_dict = { 'loss': self.best_score - score, 'time': execution_time, 'status': STATUS_OK } except BaseException as e: logger.warning( f'Exception caught in HyperoptRegressor: {type(e)}, {traceback.format_exc()} with hyperparams: {params}, setting loss to zero' ) return_dict = {'status': STATUS_FAIL} return return_dict try: fmin(f, self.search_space, algo=tpe.suggest, max_evals=self.max_evals, trials=self.trials, rstate=np.random.RandomState(SEED)) except SystemExit: logger.warning( 'Maximum alloted optimization time exceeded. Optimization exited prematurely' ) try: best_params = space_eval(self.search_space, self.trials.argmin) logger.info( 'best accuracy: {:.1%}\nbest hyperparams found using {} hyperopt trials: {}' .format(-1 * self.trials.average_best_error(), self.max_evals, best_params)) trained_reg = get_final_trained_reg(best_params, X_train, y_train) self.best_estimator = trained_reg except BaseException as e: logger.warning( 'Unable to extract the best parameters from optimization, the error: {}' .format(e)) trained_reg = None return self def predict(self, X_eval): import warnings warnings.filterwarnings("ignore") reg = self.best_estimator try: predictions = reg.predict(X_eval) except ValueError as e: logger.warning( "ValueError in predicting using regressor:{}, the error is:{}". format(reg, e)) predictions = None return predictions def get_trials(self): return self.trials
class HyperoptClassifierImpl: def __init__(self, estimator=None, max_evals=50, cv=5, handle_cv_failure=False, scoring='accuracy', best_score=0.0, max_opt_time=None, pgo: Optional[PGO] = None): """ Instantiate the HyperoptClassifier that will use the given estimator and other parameters to select the best performing trainable instantiation of the estimator. This optimizer uses negation of accuracy_score as the performance metric to be minimized by Hyperopt. Parameters ---------- estimator : lale.operators.IndividualOp or lale.operators.Pipeline, optional A valid Lale individual operator or pipeline, by default None max_evals : int, optional Number of trials of Hyperopt search, by default 50 cv : an integer or an object that has a split function as a generator yielding (train, test) splits as arrays of indices. Integer value is used as number of folds in sklearn.model_selection.StratifiedKFold, default is 5. Note that any of the iterators from https://scikit-learn.org/stable/modules/cross_validation.html#cross-validation-iterators can be used here. The fit method performs cross validation on the input dataset for per trial, and uses the mean cross validation performance for optimization. This behavior is also impacted by handle_cv_failure flag, by default 5 handle_cv_failure : bool, optional A boolean flag to indicating how to deal with cross validation failure for a trial. If True, the trial is continued by doing a 80-20 percent train-validation split of the dataset input to fit and reporting the score on the validation part. If False, the trial is terminated by assigning accuracy to zero. , by default False scoring: string or a scorer object created using https://scikit-learn.org/stable/modules/generated/sklearn.metrics.make_scorer.html#sklearn.metrics.make_scorer. A string from sklearn.metrics.SCORERS.keys() can be used or a scorer created from one of sklearn.metrics (https://scikit-learn.org/stable/modules/classes.html#module-sklearn.metrics). A completely custom scorer object can be created from a python function following the example at https://scikit-learn.org/stable/modules/model_evaluation.html The metric has to return a scalar value, and note that scikit-learns's scorer object always returns values such that higher score is better. Since Hyperopt solves a minimization problem, we negate the score value to pass to Hyperopt. by default 'accuracy'. best_score : float, optional The best score for the specified scorer. This allows us to return a loss to hyperopt that is greater than equal to zero, where zero is the best loss. By default, this is set to zero to follow current behavior. max_opt_time : float, optional Maximum amout of time in seconds for the optimization. By default, None, implying no runtime bound. pgo : Optional[PGO], optional [description], by default None Raises ------ e [description] Examples -------- >>> from sklearn.metrics import make_scorer, f1_score, accuracy_score >>> lr = LogisticRegression() >>> clf = HyperoptClassifier(estimator=lr, scoring='accuracy', cv=5, max_evals=2) >>> from sklearn import datasets >>> diabetes = datasets.load_diabetes() >>> X = diabetes.data[:150] >>> y = diabetes.target[:150] >>> trained = clf.fit(X, y) >>> predictions = trained.predict(X) Other scoring metrics: >>> clf = HyperoptClassifier(estimator=lr, scoring=make_scorer(f1_score, average='macro'), cv=3, max_evals=2) """ self.max_evals = max_evals if estimator is None: self.estimator = LogisticRegression else: self.estimator = estimator self.search_space = hp.choice( 'meta_model', [hyperopt_search_space(self.estimator, pgo=pgo)]) self.scoring = scoring self.best_score = best_score self.handle_cv_failure = handle_cv_failure self.cv = cv self.trials = Trials() self.max_opt_time = max_opt_time def fit(self, X_train, y_train): opt_start_time = time.time() def hyperopt_train_test(params, X_train, y_train): warnings.filterwarnings("ignore") clf = create_instance_from_hyperopt_search_space( self.estimator, params) try: cv_score, logloss, execution_time = cross_val_score_track_trials( clf, X_train, y_train, cv=self.cv, scoring=self.scoring) logger.debug("Successful trial of hyperopt") except BaseException as e: #If there is any error in cross validation, use the score based on a random train-test split as the evaluation criterion if self.handle_cv_failure: X_train_part, X_validation, y_train_part, y_validation = train_test_split( X_train, y_train, test_size=0.20) start = time.time() clf_trained = clf.fit(X_train_part, y_train_part) #predictions = clf_trained.predict(X_validation) scorer = check_scoring(clf, scoring=self.scoring) cv_score = scorer(clf_trained, X_validation, y_validation) execution_time = time.time() - start y_pred_proba = clf_trained.predict_proba(X_validation) try: logloss = log_loss(y_true=y_validation, y_pred=y_pred_proba) except BaseException: logloss = 0 logger.debug("Warning, log loss cannot be computed") else: logger.debug(e) logger.debug("Error {} with pipeline:{}".format( e, clf.to_json())) raise e return cv_score, logloss, execution_time def get_final_trained_clf(params, X_train, y_train): warnings.filterwarnings("ignore") clf = create_instance_from_hyperopt_search_space( self.estimator, params) clf = clf.fit(X_train, y_train) return clf def f(params): current_time = time.time() if (self.max_opt_time is not None) and ( (current_time - opt_start_time) > self.max_opt_time): # if max optimization time set, and we have crossed it, exit optimization completely sys.exit(0) params_to_save = copy.deepcopy(params) return_dict = {} try: score, logloss, execution_time = hyperopt_train_test( params, X_train=X_train, y_train=y_train) return_dict = { 'loss': self.best_score - score, 'time': execution_time, 'log_loss': logloss, 'status': STATUS_OK, 'params': params_to_save } except BaseException as e: logger.warning( "Exception caught in HyperoptClassifier:{}, setting status to FAIL" .format(e)) return_dict = {'status': STATUS_FAIL} return return_dict try: fmin(f, self.search_space, algo=tpe.suggest, max_evals=self.max_evals, trials=self.trials, rstate=np.random.RandomState(SEED)) except SystemExit: logger.warning( 'Maximum alloted optimization time exceeded. Optimization exited prematurely' ) try: best_params = space_eval(self.search_space, self.trials.argmin) logger.info( 'best score: {:.1%}\nbest hyperparams found using {} hyperopt trials: {}' .format(self.best_score - self.trials.average_best_error(), self.max_evals, best_params)) trained_clf = get_final_trained_clf(best_params, X_train, y_train) self.best_estimator = trained_clf except BaseException as e: logger.warning( 'Unable to extract the best parameters from optimization, the error: {}' .format(e)) trained_clf = None return self def predict(self, X_eval): import warnings warnings.filterwarnings("ignore") clf = self.best_estimator try: predictions = clf.predict(X_eval) except ValueError as e: logger.warning( "ValueError in predicting using classifier:{}, the error is:{}" .format(clf, e)) predictions = None return predictions def get_trials(self): return self.trials
class HyperoptRegressor(): def __init__(self, model=None, max_evals=50, cv=5, handle_cv_failure=False, pgo: Optional[PGO] = None): self.max_evals = max_evals if model is None: self.model = RandomForestRegressor else: self.model = model self.search_space = hp.choice( 'meta_model', [hyperopt_search_space(self.model, pgo=pgo)]) self.handle_cv_failure = handle_cv_failure self.cv = cv self.trials = Trials() def fit(self, X_train, y_train): def hyperopt_train_test(params, X_train, y_train): warnings.filterwarnings("ignore") reg = create_instance_from_hyperopt_search_space( self.model, params) try: cv_score, logloss, execution_time = cross_val_score_track_trials( reg, X_train, y_train, cv=KFold(self.cv), scoring=r2_score) logger.debug("Successful trial of hyperopt") except BaseException as e: #If there is any error in cross validation, use the accuracy based on a random train-test split as the evaluation criterion if self.handle_cv_failure: X_train_part, X_validation, y_train_part, y_validation = train_test_split( X_train, y_train, test_size=0.20) start = time.time() reg_trained = reg.fit(X_train_part, y_train_part) predictions = reg_trained.predict(X_validation) execution_time = time.time() - start cv_score = r2_score(y_validation, predictions) else: logger.debug(e) logger.debug("Error {} with pipeline:{}".format( e, reg.to_json())) raise e return cv_score, logloss, execution_time def get_final_trained_reg(params, X_train, y_train): warnings.filterwarnings("ignore") reg = create_instance_from_hyperopt_search_space( self.model, params) reg = reg.fit(X_train, y_train) return reg def f(params): try: r_squared, logloss, execution_time = hyperopt_train_test( params, X_train=X_train, y_train=y_train) except BaseException as e: logger.warning( "Exception caught in HyperoptClassifer:{} with hyperparams:{}, setting accuracy to zero" .format(e, params)) r_squared = 0 execution_time = 0 logloss = 0 return { 'loss': -r_squared, 'time': execution_time, 'log_loss': logloss, 'status': STATUS_OK } fmin(f, self.search_space, algo=tpe.suggest, max_evals=self.max_evals, trials=self.trials, rstate=np.random.RandomState(SEED)) best_params = space_eval(self.search_space, self.trials.argmin) logger.info( 'best accuracy: {:.1%}\nbest hyperparams found using {} hyperopt trials: {}' .format(-1 * self.trials.average_best_error(), self.max_evals, best_params)) trained_reg = get_final_trained_reg(best_params, X_train, y_train) return trained_reg def predict(self, X_eval): import warnings warnings.filterwarnings("ignore") reg = self.model try: predictions = reg.predict(X_eval) except ValueError as e: logger.warning( "ValueError in predicting using classifier:{}, the error is:{}" .format(reg, e)) predictions = None return predictions def get_trials(self): return self.trials
class HyperoptImpl: def __init__(self, estimator=None, max_evals=50, cv=5, handle_cv_failure=False, scoring='accuracy', best_score=0.0, max_opt_time=None, max_eval_time=None, pgo:Optional[PGO]=None, show_progressbar=True, args_to_scorer=None, verbose=False): self.max_evals = max_evals if estimator is None: self.estimator = LogisticRegression() else: self.estimator = estimator self.search_space = hp.choice('meta_model', [hyperopt_search_space(self.estimator, pgo=pgo)]) self.scoring = scoring self.best_score = best_score self.handle_cv_failure = handle_cv_failure self.cv = cv self._trials = Trials() self.max_opt_time = max_opt_time self.max_eval_time = max_eval_time self.show_progressbar = show_progressbar if args_to_scorer is not None: self.args_to_scorer = args_to_scorer else: self.args_to_scorer = {} self.verbose = verbose def fit(self, X_train, y_train): opt_start_time = time.time() self.cv = check_cv(self.cv, y = y_train, classifier=True) #TODO: Replace the classifier flag value by using tags? def hyperopt_train_test(params, X_train, y_train): warnings.filterwarnings("ignore") trainable = create_instance_from_hyperopt_search_space(self.estimator, params) try: cv_score, logloss, execution_time = cross_val_score_track_trials(trainable, X_train, y_train, cv=self.cv, scoring=self.scoring, args_to_scorer=self.args_to_scorer) logger.debug("Successful trial of hyperopt with hyperparameters:{}".format(params)) except BaseException as e: #If there is any error in cross validation, use the score based on a random train-test split as the evaluation criterion if self.handle_cv_failure: X_train_part, X_validation, y_train_part, y_validation = train_test_split(X_train, y_train, test_size=0.20) start = time.time() trained = trainable.fit(X_train_part, y_train_part) scorer = check_scoring(trainable, scoring=self.scoring) cv_score = scorer(trained, X_validation, y_validation, **self.args_to_scorer) execution_time = time.time() - start y_pred_proba = trained.predict_proba(X_validation) try: logloss = log_loss(y_true=y_validation, y_pred=y_pred_proba) except BaseException: logloss = 0 logger.debug("Warning, log loss cannot be computed") else: logger.debug(e) logger.debug("Error {} with pipeline:{}".format(e, trainable.to_json())) raise e return cv_score, logloss, execution_time def proc_train_test(params, X_train, y_train, return_dict): return_dict['params'] = copy.deepcopy(params) try: score, logloss, execution_time = hyperopt_train_test(params, X_train=X_train, y_train=y_train) return_dict['loss'] = self.best_score - score return_dict['time'] = execution_time return_dict['log_loss'] = logloss return_dict['status'] = STATUS_OK except BaseException as e: logger.warning(f"Exception caught in Hyperopt:{type(e)}, {traceback.format_exc()} with hyperparams: {params}, setting status to FAIL") return_dict['status'] = STATUS_FAIL return_dict['error_msg'] = f"Exception caught in Hyperopt:{type(e)}, {traceback.format_exc()} with hyperparams: {params}" if self.verbose: print(return_dict['error_msg']) def get_final_trained_estimator(params, X_train, y_train): warnings.filterwarnings("ignore") trainable = create_instance_from_hyperopt_search_space(self.estimator, params) trained = trainable.fit(X_train, y_train) return trained def f(params): current_time = time.time() if (self.max_opt_time is not None) and ((current_time - opt_start_time) > self.max_opt_time) : # if max optimization time set, and we have crossed it, exit optimization completely sys.exit(0) if self.max_eval_time: # Run hyperopt in a subprocess that can be interupted manager = multiprocessing.Manager() proc_dict = manager.dict() p = multiprocessing.Process( target=proc_train_test, args=(params, X_train, y_train, proc_dict)) p.start() p.join(self.max_eval_time) if p.is_alive(): p.terminate() p.join() logger.warning(f"Maximum alloted evaluation time exceeded. with hyperparams: {params}, setting status to FAIL") proc_dict['status'] = STATUS_FAIL if 'status' not in proc_dict: logger.warning(f"Corrupted results, setting status to FAIL") proc_dict['status'] = STATUS_FAIL else: proc_dict = {} proc_train_test(params, X_train, y_train, proc_dict) return proc_dict try : fmin(f, self.search_space, algo=tpe.suggest, max_evals=self.max_evals, trials=self._trials, rstate=np.random.RandomState(SEED), show_progressbar=self.show_progressbar) except SystemExit : logger.warning('Maximum alloted optimization time exceeded. Optimization exited prematurely') except AllTrialsFailed: self._best_estimator = None if STATUS_OK not in self._trials.statuses(): raise ValueError('Error from hyperopt, none of the trials succeeded.') try : best_params = space_eval(self.search_space, self._trials.argmin) logger.info( 'best score: {:.1%}\nbest hyperparams found using {} hyperopt trials: {}'.format( self.best_score - self._trials.average_best_error(), self.max_evals, best_params ) ) trained = get_final_trained_estimator(best_params, X_train, y_train) self._best_estimator = trained except BaseException as e : logger.warning('Unable to extract the best parameters from optimization, the error: {}'.format(e)) self._best_estimator = None return self def predict(self, X_eval): import warnings warnings.filterwarnings("ignore") if self._best_estimator is None: raise ValueError("Can not predict as the best estimator is None. Either an attempt to call `predict` " "before calling `fit` or all the trials during `fit` failed.") trained = self._best_estimator try: predictions = trained.predict(X_eval) except ValueError as e: logger.warning("ValueError in predicting using Hyperopt:{}, the error is:{}".format(trained, e)) predictions = None return predictions def summary(self): """Table summarizing the trial results (ID, loss, time, log_loss, status). Returns ------- result : DataFrame""" def make_record(trial_dict): try: loss = trial_dict['result']['loss'] except BaseException: loss = np.nan try: time = trial_dict['result']['time'] except BaseException: time = '-' try: log_loss = trial_dict['result']['log_loss'] except BaseException: log_loss = np.nan return { 'name': f'p{trial_dict["tid"]}', 'tid': trial_dict['tid'], 'loss': trial_dict['result'].get('loss', float('nan')), 'time': trial_dict['result'].get('time', float('nan')), 'log_loss': trial_dict['result'].get('log_loss', float('nan')), 'status': trial_dict['result']['status']} records = [make_record(td) for td in self._trials.trials] result = pd.DataFrame.from_records(records, index='name') return result def get_pipeline(self, pipeline_name=None, astype='lale'): """Retrieve one of the trials. Parameters ---------- pipeline_name : union type, default None - string Key for table returned by summary(), return a trainable pipeline. - None When not specified, return the best trained pipeline found. astype : 'lale' or 'sklearn', default 'lale' Type of resulting pipeline. Returns ------- result : Trained operator if best, trainable operator otherwise. """ if pipeline_name is None: result = getattr(self, '_best_estimator', None) else: tid = int(pipeline_name[1:]) params = self._trials.trials[tid]['result']['params'] result = create_instance_from_hyperopt_search_space( self.estimator, params) if result is None or astype == 'lale': return result assert astype == 'sklearn', astype return result.export_to_sklearn_pipeline()
class TPEOptimization(BaseOptimization): def __init__(self, sorter, recording, gt_sorting, params_to_opt, space=None, run_schedule=[100], metric='accuracy', recdir=None, outfile=None, x0=None, y0=None): BaseOptimization.__init__(self, sorter=sorter, recording=recording, gt_sorting=gt_sorting, params_to_opt=params_to_opt, space=space, run_schedule=run_schedule, metric=metric, recdir=recdir, outfile=outfile, x0=y0, y0=y0) self.trials = Trials() self.space = self.define_space(space) def run(self): results = self.optimise( self.params_to_opt, self.function_wrapper, self.run_schedule) self.results_obj = results if self.outfile is not None: self.save_results(self.outfile) def optimise(self, parameter_definitions, function, run_schedule): start_time = time.time() best = hyperopt.fmin(function, self.space, algo=tpe.suggest, max_evals=run_schedule[0], trials=self.trials, show_progressbar=False) results_obj = self.get_optimization_details() results_obj['time_taken'] = start_time - time.time() print("--- %s seconds ---" % (time.time() - start_time)) return results_obj def define_space(self, space): if space is not None: return space space = {} for key, value in self.params_to_opt.items(): if type(value) is list: space[key] = hp.choice(key, value) if type(value[0]) is int: space[key] = hp.quniform(key, value[0], value[1], 1) if type(value[0]) is float: space[key] = hp.uniform(key, value[0], value[1]) return space def get_best_params(self): best_params = {} for key, value in self.params_to_opt.items(): if type(value[0]) is int: best_params[key] = int(self.trials.best_trial['misc']['vals'][key][0]) else: best_params[key] = self.trials.best_trial['misc']['vals'][key][0] return best_params def get_trials(self): return self.trials def get_optimization_details(self): results_obj = {} results_obj['optimal_params'] = self.get_best_params() #self.trials.best_trial['misc']['vals'] results_obj['best_score'] = -self.trials.best_trial['result']['loss'] results_obj['params_evaluated'] = self.trials.vals results_obj['scores'] = [t['result']['loss'] for t in self.trials.trials] #self.results_obj#[] results_obj['iter_min_found'] = self.trials.best_trial['tid'] results_obj['trials'] = self.trials results_obj['avg_best_score'] = self.trials.average_best_error() results_obj['total_iter'] = self.iteration return results_obj def plot_convergence(self): ys = [t['result']['loss'] for t in self.trials.trials] plt.figure(figsize=(15, 3.5)) ax = plt.gca() ax.grid() n_calls = len(ys) mins = [np.min(ys[:i]) for i in range(1, n_calls + 1)] ax.plot(range(1, n_calls + 1), mins, c='b', marker=".", markersize=12, lw=2) plt.xlabel('n_calls') plt.ylabel('min(-Accuracy)') plt.title('Convergence of TPE in sorting optimisation') def plot_histograms(self): parameters = list(trials.trials[0]['misc']['vals'].keys()) n = len(parameters) cmap = plt.cm.jet for i, val in enumerate(parameters): xs = np.array([t['misc']['vals'][val] for t in self.trials.trials]).ravel() ys = [-t['result']['loss'] for t in trials.trials] ys = np.array(ys) plt.figure(figsize=(3, 3)) plt.hist(xs) plt.title(val)
class HyperoptClassifier(): def __init__(self, model=None, max_evals=50, cv=5, handle_cv_failure=False, pgo: Optional[PGO] = None): """ Instantiate the HyperoptClassifier that will use the given model and other parameters to select the best performing trainable instantiation of the model. This optimizer uses negation of accuracy_score as the performance metric to be minimized by Hyperopt. Parameters ---------- model : lale.operators.IndividualOp or lale.operators.Pipeline, optional A valid Lale individual operator or pipeline, by default None max_evals : int, optional Number of trials of Hyperopt search, by default 50 cv : an integer or an object that has a split function as a generator yielding (train, test) splits as arrays of indices. Integer value is used as number of folds in sklearn.model_selection.StratifiedKFold, default is 5. Note that any of the iterators from https://scikit-learn.org/stable/modules/cross_validation.html#cross-validation-iterators can be used here. The fit method performs cross validation on the input dataset for per trial, and uses the mean cross validation performance for optimization. This behavior is also impacted by handle_cv_failure flag, by default 5 handle_cv_failure : bool, optional A boolean flag to indicating how to deal with cross validation failure for a trial. If True, the trial is continued by doing a 80-20 percent train-validation split of the dataset input to fit and reporting the accuracy on the validation part. If False, the trial is terminated by assigning accuracy to zero. , by default False pgo : Optional[PGO], optional [description], by default None Raises ------ e [description] """ self.max_evals = max_evals if model is None: self.model = LogisticRegression else: self.model = model self.search_space = hp.choice( 'meta_model', [hyperopt_search_space(self.model, pgo=pgo)]) self.handle_cv_failure = handle_cv_failure self.cv = cv self.trials = Trials() def fit(self, X_train, y_train): def hyperopt_train_test(params, X_train, y_train): warnings.filterwarnings("ignore") clf = create_instance_from_hyperopt_search_space( self.model, params) try: cv_score, logloss, execution_time = cross_val_score_track_trials( clf, X_train, y_train, cv=self.cv) logger.debug("Successful trial of hyperopt") except BaseException as e: #If there is any error in cross validation, use the accuracy based on a random train-test split as the evaluation criterion if self.handle_cv_failure: X_train_part, X_validation, y_train_part, y_validation = train_test_split( X_train, y_train, test_size=0.20) start = time.time() clf_trained = clf.fit(X_train_part, y_train_part) predictions = clf_trained.predict(X_validation) execution_time = time.time() - start y_pred_proba = clf_trained.predict_proba(X_validation) try: logloss = log_loss(y_true=y_validation, y_pred=y_pred_proba) except BaseException: logloss = 0 logger.debug("Warning, log loss cannot be computed") cv_score = accuracy_score( y_validation, [round(pred) for pred in predictions]) else: logger.debug(e) logger.debug("Error {} with pipeline:{}".format( e, clf.to_json())) raise e #print("TRIALS") #print(json.dumps(self.get_trials().trials, default = myconverter, indent=4)) return cv_score, logloss, execution_time def get_final_trained_clf(params, X_train, y_train): warnings.filterwarnings("ignore") clf = create_instance_from_hyperopt_search_space( self.model, params) clf = clf.fit(X_train, y_train) return clf def f(params): params_to_save = copy.deepcopy(params) try: acc, logloss, execution_time = hyperopt_train_test( params, X_train=X_train, y_train=y_train) except BaseException as e: logger.warning( "Exception caught in HyperoptClassifer:{}, setting accuracy to zero" .format(e)) acc = 0 execution_time = 0 logloss = 0 return { 'loss': -acc, 'time': execution_time, 'log_loss': logloss, 'status': STATUS_OK, 'params': params_to_save } fmin(f, self.search_space, algo=tpe.suggest, max_evals=self.max_evals, trials=self.trials, rstate=np.random.RandomState(SEED)) best_params = space_eval(self.search_space, self.trials.argmin) logger.info( 'best accuracy: {:.1%}\nbest hyperparams found using {} hyperopt trials: {}' .format(-1 * self.trials.average_best_error(), self.max_evals, best_params)) trained_clf = get_final_trained_clf(best_params, X_train, y_train) return trained_clf def predict(self, X_eval): import warnings warnings.filterwarnings("ignore") clf = self.model try: predictions = clf.predict(X_eval) except ValueError as e: logger.warning( "ValueError in predicting using classifier:{}, the error is:{}" .format(clf, e)) predictions = None return predictions def get_trials(self): return self.trials
def opt_method(hsidata, initializers, resdir, max_evals): dataset_name = hsidata.dataset_name __location__ = os.path.realpath( os.path.join(os.getcwd(), os.path.dirname(__file__))) mleng = matlab.engine.start_matlab() mleng.addpath(__location__) configpath = os.path.join(__location__, 'datasets.cfg') parser = ConfigParser() parser.read(configpath) max_iter = parser.getint(dataset_name, 'max_iter') y = hsidata.data Y = matlab.double(y.tolist()) ref_endmembers = matlab.double(hsidata.ref_endmembers.tolist()) init_endmembers = matlab.double(hsidata.init_endmembers.tolist()) init_abundances = matlab.double(hsidata.init_abundances.tolist()) verbose = True def objective_func(hyperpars): output = mleng.lhalf(ref_endmembers, init_endmembers, init_abundances, Y, hyperpars['q'], hyperpars['delta'], hyperpars['h'], hyperpars['max_iter'], verbose, nargout=5) A = np.array(output[0]) S = np.array(output[1]) try: J = np.array(output[2]).tolist()[0] except TypeError: J = [output[2]] SAD = np.array(output[3]).tolist()[0] MSE = mse(y, A, np.transpose(S)) S = S.reshape(hsidata.n_rows, hsidata.n_cols, hsidata.n_endmembers).transpose((1, 0, 2)) results = { 'endmembers': A, 'abundances': S, 'loss': J, 'SAD': SAD, 'MSE': MSE } return {'loss': SAD[-1], 'status': STATUS_OK, 'attachments': results} space = { 'max_iter': max_iter, 'q': scope.matlab_double(hp.uniform('lhalf_' + dataset_name + '_q', 0, 1)), 'delta': scope.matlab_double( hp.uniform('lhalf_' + dataset_name + '_delta', 0, 1000)) } h = scope.matlab_double([ hp.uniform('lhalf_' + dataset_name + '_h' + str(i), 0, 1000) for i in range(hsidata.n_endmembers) ]) space['h'] = h trials = Trials() pars = fmin(lambda x: objective_func(x), space=space, algo=tpe.suggest, max_evals=max_evals, trials=trials, rstate=np.random.RandomState(random_seed)) mleng.quit() improvements = reduce(improvement_only, trials.losses(), []) save_config(resdir, dataset_name, pars, trials.average_best_error()) return improvements, pars, trials