def describe_pipeline(self, pipeline_id, return_dict=False): """Describe a pipeline Arguments: pipeline_id (int): pipeline to describe return_dict (bool): If True, return dictionary of information about pipeline. Defaults to False. Returns: Description of specified pipeline. Includes information such as type of pipeline components, problem, training time, cross validation, etc. """ if pipeline_id not in self._results['pipeline_results']: raise PipelineNotFoundError("Pipeline not found") pipeline = self.get_pipeline(pipeline_id) pipeline_results = self._results['pipeline_results'][pipeline_id] pipeline.describe() log_subtitle(logger, "Training") logger.info("Training for {} problems.".format(pipeline.problem_type)) if self.optimize_thresholds and self.objective.is_defined_for_problem_type( ProblemTypes.BINARY) and self.objective.can_optimize_threshold: logger.info( "Objective to optimize binary classification pipeline thresholds for: {}" .format(self.objective)) logger.info("Total training time (including CV): %.1f seconds" % pipeline_results["training_time"]) log_subtitle(logger, "Cross Validation", underline="-") all_objective_scores = [ fold["all_objective_scores"] for fold in pipeline_results["cv_data"] ] all_objective_scores = pd.DataFrame(all_objective_scores) for c in all_objective_scores: if c in ["# Training", "# Validation"]: all_objective_scores[c] = all_objective_scores[c].astype( "object") continue mean = all_objective_scores[c].mean(axis=0) std = all_objective_scores[c].std(axis=0) all_objective_scores.loc["mean", c] = mean all_objective_scores.loc["std", c] = std all_objective_scores.loc[ "coef of var", c] = std / mean if abs(mean) > 0 else np.inf all_objective_scores = all_objective_scores.fillna("-") with pd.option_context('display.float_format', '{:.3f}'.format, 'expand_frame_repr', False): logger.info(all_objective_scores) if return_dict: return pipeline_results
def get_pipeline(self, pipeline_id): """Given the ID of a pipeline training result, returns an untrained instance of the specified pipeline initialized with the parameters used to train that pipeline during automl search. Arguments: pipeline_id (int): pipeline to retrieve Returns: PipelineBase: untrained pipeline instance associated with the provided ID """ pipeline_results = self.results['pipeline_results'].get(pipeline_id) if pipeline_results is None: raise PipelineNotFoundError("Pipeline not found in automl results") pipeline_class = pipeline_results.get('pipeline_class') parameters = pipeline_results.get('parameters') if pipeline_class is None or parameters is None: raise PipelineNotFoundError("Pipeline class or parameters not found in automl results") return pipeline_class(parameters, random_seed=self.random_seed)
def best_pipeline(self): """Returns a trained instance of the best pipeline and parameters found during automl search. If `train_best_pipeline` is set to False, returns an untrained pipeline instance. Returns: PipelineBase: A trained instance of the best pipeline and parameters found during automl search. If `train_best_pipeline` is set to False, returns an untrained pipeline instance. """ if not self._best_pipeline: raise PipelineNotFoundError("automl search must be run before selecting `best_pipeline`.") return self._best_pipeline
def add_result(self, score_to_minimize, pipeline, trained_pipeline_results): """Register results from evaluating a pipeline Arguments: score_to_minimize (float): The score obtained by this pipeline on the primary objective, converted so that lower values indicate better pipelines. pipeline (PipelineBase): The trained pipeline object which was used to compute the score. trained_pipeline_results (dict): Results from training a pipeline. """ if pipeline.name not in self._tuners: raise PipelineNotFoundError(f"No such pipeline allowed in this AutoML search: {pipeline.name}") self._tuners[pipeline.name].add(pipeline.parameters, score_to_minimize)