def get_pipeline(self, pipeline_name=None, astype='lale'): """Retrieve one of the trials. Parameters ---------- pipeline_name : union type, default None - string Key for table returned by summary(), return a trainable pipeline. - None When not specified, return the best trained pipeline found. astype : 'lale' or 'sklearn', default 'lale' Type of resulting pipeline. Returns ------- result : Trained operator if best, trainable operator otherwise. """ if pipeline_name is None: result = getattr(self, '_best_estimator', None) else: tid = int(pipeline_name[1:]) params = self._trials.trials[tid]['result']['params'] result = create_instance_from_hyperopt_search_space( self.estimator, params) if result is None or astype == 'lale': return result assert astype == 'sklearn', astype return result.export_to_sklearn_pipeline()
def hyperopt_train_test(params, X_train, y_train): warnings.filterwarnings("ignore") reg = create_instance_from_hyperopt_search_space( self.estimator, params) try: cv_score, _, execution_time = cross_val_score_track_trials( reg, X_train, y_train, cv=KFold(self.cv), scoring=self.scoring) logger.debug("Successful trial of hyperopt") except BaseException as e: #If there is any error in cross validation, use the accuracy based on a random train-test split as the evaluation criterion if self.handle_cv_failure: X_train_part, X_validation, y_train_part, y_validation = train_test_split( X_train, y_train, test_size=0.20) start = time.time() reg_trained = reg.fit(X_train_part, y_train_part) scorer = check_scoring(reg, scoring=self.scoring) cv_score = scorer(reg_trained, X_validation, y_validation) execution_time = time.time() - start else: logger.debug(e) logger.debug("Error {} with pipeline:{}".format( e, reg.to_json())) raise e return cv_score, execution_time
def get_final_trained_estimator(params, X_train, y_train): warnings.filterwarnings("ignore") trainable = create_instance_from_hyperopt_search_space( self.estimator, params ) trained = trainable.fit(X_train, y_train) return trained
def hyperopt_train_test(params, X_train, y_train): warnings.filterwarnings("ignore") clf = create_instance_from_hyperopt_search_space( self.estimator, params) try: cv_score, logloss, execution_time = cross_val_score_track_trials( clf, X_train, y_train, cv=self.cv, scoring=self.scoring) logger.debug("Successful trial of hyperopt") except BaseException as e: #If there is any error in cross validation, use the score based on a random train-test split as the evaluation criterion if self.handle_cv_failure: X_train_part, X_validation, y_train_part, y_validation = train_test_split( X_train, y_train, test_size=0.20) start = time.time() clf_trained = clf.fit(X_train_part, y_train_part) #predictions = clf_trained.predict(X_validation) scorer = check_scoring(clf, scoring=self.scoring) cv_score = scorer(clf_trained, X_validation, y_validation) execution_time = time.time() - start y_pred_proba = clf_trained.predict_proba(X_validation) try: logloss = log_loss(y_true=y_validation, y_pred=y_pred_proba) except BaseException: logloss = 0 logger.debug("Warning, log loss cannot be computed") else: logger.debug(e) logger.debug("Error {} with pipeline:{}".format( e, clf.to_json())) raise e return cv_score, logloss, execution_time
def proc_train_test(params, X_train, y_train, return_dict): return_dict["params"] = copy.deepcopy(params) try: score, logloss, execution_time = hyperopt_train_test( params, X_train=X_train, y_train=y_train ) return_dict["loss"] = self.best_score - score return_dict["time"] = execution_time return_dict["log_loss"] = logloss return_dict["status"] = hyperopt.STATUS_OK except BaseException as e: exception_type = f"{type(e).__module__}.{type(e).__name__}" try: trainable = create_instance_from_hyperopt_search_space( self.estimator, params ) trial_info = ( f'pipeline: """{trainable.pretty_print(show_imports=False)}"""' ) except BaseException: trial_info = f"hyperparams: {params}" error_msg = f"Exception caught in Hyperopt: {exception_type}, {traceback.format_exc()}with {trial_info}" logger.warning(error_msg + ", setting status to FAIL") return_dict["status"] = hyperopt.STATUS_FAIL return_dict["error_msg"] = error_msg if self.verbose: print(return_dict["error_msg"])
def hyperopt_train_test(params, X_train, y_train): warnings.filterwarnings("ignore") clf = create_instance_from_hyperopt_search_space( self.model, params) try: cv_score, logloss, execution_time = cross_val_score_track_trials( clf, X_train, y_train, cv=self.cv) logger.debug("Successful trial of hyperopt") except BaseException as e: #If there is any error in cross validation, use the accuracy based on a random train-test split as the evaluation criterion if self.handle_cv_failure: X_train_part, X_validation, y_train_part, y_validation = train_test_split( X_train, y_train, test_size=0.20) start = time.time() clf_trained = clf.fit(X_train_part, y_train_part) predictions = clf_trained.predict(X_validation) execution_time = time.time() - start y_pred_proba = clf_trained.predict_proba(X_validation) try: logloss = log_loss(y_true=y_validation, y_pred=y_pred_proba) except BaseException: logloss = 0 logger.debug("Warning, log loss cannot be computed") cv_score = accuracy_score( y_validation, [round(pred) for pred in predictions]) else: logger.debug(e) logger.debug("Error {} with pipeline:{}".format( e, clf.to_json())) raise e #print("TRIALS") #print(json.dumps(self.get_trials().trials, default = myconverter, indent=4)) return cv_score, logloss, execution_time
def get_pipeline(self, pipeline_name=None, astype="lale"): """Retrieve one of the trials. Parameters ---------- pipeline_name : union type, default None - string Key for table returned by summary(), return a trainable pipeline. - None When not specified, return the best trained pipeline found. astype : 'lale' or 'sklearn', default 'lale' Type of resulting pipeline. Returns ------- result : Trained operator if best, trainable operator otherwise.""" best_name = None if self._best_estimator is not None: best_name = f'p{self._trials.best_trial["tid"]}' if pipeline_name is None: pipeline_name = best_name if pipeline_name == best_name: result = getattr(self, "_best_estimator", None) else: assert pipeline_name is not None tid = int(pipeline_name[1:]) params = self._trials.trials[tid]["result"]["params"] result = create_instance_from_hyperopt_search_space(self.estimator, params) if result is None or astype == "lale": return result assert astype == "sklearn", astype return result.export_to_sklearn_pipeline()
def hyperopt_train_test(params, X_train, y_train): warnings.filterwarnings("ignore") trainable = create_instance_from_hyperopt_search_space( self.estimator, params ) try: cv_score, logloss, execution_time = cross_val_score_track_trials( trainable, X_train, y_train, cv=self.cv, scoring=self.scoring, args_to_scorer=self.args_to_scorer, ) logger.debug( "Successful trial of hyperopt with hyperparameters:{}".format( params ) ) except BaseException as e: # If there is any error in cross validation, use the score based on a random train-test split as the evaluation criterion if self.handle_cv_failure and trainable is not None: ( X_train_part, X_validation, y_train_part, y_validation, ) = train_test_split(X_train, y_train, test_size=0.20) start = time.time() trained = trainable.fit(X_train_part, y_train_part, **fit_params) scorer = check_scoring(trainable, scoring=self.scoring) cv_score = scorer( trained, X_validation, y_validation, **self.args_to_scorer ) execution_time = time.time() - start y_pred_proba = trained.predict_proba(X_validation) try: logloss = log_loss(y_true=y_validation, y_pred=y_pred_proba) except BaseException: logloss = 0 logger.debug("Warning, log loss cannot be computed") else: logger.debug(e) if trainable is None: logger.debug( "Error {} with uncreatable pipeline with parameters:{}".format( e, lale.pretty_print.hyperparams_to_string(params) ) ) else: logger.debug( "Error {} with pipeline:{}".format(e, trainable.to_json()) ) raise e return cv_score, logloss, execution_time
def get_final_trained_reg(params, X_train, y_train): warnings.filterwarnings("ignore") reg = create_instance_from_hyperopt_search_space( self.estimator, params) reg = reg.fit(X_train, y_train) return reg
def get_final_trained_clf(params, X_train, y_train): warnings.filterwarnings("ignore") clf = create_instance_from_hyperopt_search_space( self.model, params) clf = clf.fit(X_train, y_train) return clf