def test_can_run_basic(self): """ The flexga tuner should be able to complete without erroring. """ pipeline = Pipeline() pipeline.add_step(classifiers["DecisionTreeClassifierPrimitive"]) evaluate = make_train_test_evaluator() logger.info( f"baseline score: {evaluate(pipeline, self.X, self.y, f1macro)}") ga_tune(pipeline, self.X, self.y, evaluate, f1macro, iters=2)
def test_can_tune_multiple_primitives(self): """ The flexga tuner should be able to tune the hyperparameters of all primitives in a pipeline at once. """ pipeline = Pipeline() pipeline.add_step(transformers["PCAPrimitive"]) pipeline.add_step(classifiers["DecisionTreeClassifierPrimitive"]) evaluate = make_train_test_evaluator() logger.info( f"baseline score: {evaluate(pipeline, self.X, self.y, f1macro)}") ga_tune(pipeline, self.X, self.y, evaluate, f1macro, iters=2)
def _tuner_callback(self, tuner_state: dict) -> bool: now = time() logger.info( f"candidate pipeline in generation {tuner_state['nit']} finished. " f"{self.state.endbytime - now:.2f} seconds left in budget." ) logger.info(f"best score found so far: {tuner_state['fun']}") logger.info( f"best hyperparameter config found so far: {tuner_state['kwargs_opt']}" ) # We need to quit early if our time budget is used up. return True if time() > self.state.endbytime else False
def _sampler_cb(self, sampler_state: SamplerState) -> bool: # Decide how much time is left available to us in # the sampling phase. if self.tune: # We want to leave enough time in the budget to be able # to complete at least one generation of hyperparameter tuning. if self.metric.is_better_than(sampler_state.score, self.state.best_score): self.state.best_score = sampler_state.score # An estimate of how long it will take to complete one # generation of hyperparameter tuning on this current # best pipeline. self.state.best_pipeline_min_tune_time = ( sampler_state.train_time * sampler_state.pipeline.num_params * self.tuning_mult_factor ) sampling_endtime = ( self.state.endbytime - self.state.best_pipeline_min_tune_time ) else: sampling_endtime = self.state.endbytime now = time() logger.info(f"{sampling_endtime - now:.2f} seconds left in sampling budget") # Logic for tracking sampler progress and exiting when the cost # of finding a new best score is too great. self.progress.observe(sampler_state.score) exit_early = False if now > sampling_endtime: exit_early = True elif self.progress.can_report: logger.info( f"estimated time to new best: {self.progress.return_time:.2f} seconds" ) if now + self.progress.return_time > sampling_endtime: # type: ignore exit_early = True if exit_early: logger.info( "not enough time is left in the budget to find a new " "best score, so no more sampling will be done" ) return exit_early
def run( self, X: pd.DataFrame, y: pd.Series, *, models: t.List[t.Type[Primitive]], transformers: t.List[t.Type[Primitive]], problem_type: ProblemType, metric: Metric, evaluator: t.Callable, pipeline_timeout: t.Optional[int], num_samples: t.Optional[int] = None, callback: t.Union[None, t.Callable, t.List[t.Callable]] = None, exit_on_pipeline_error: bool = True, ) -> t.Tuple[Pipeline, float, int]: """Samples `num_samples` pipelines, returning the best one found along the way. Returns ------- best_pipeline : Pipeline The fitted best pipeline trained on the problem. best_score : float The score of the best pipeline that was trained. n_iters : int The total number of iterations the sampler completed. """ # Validate inputs if num_samples is None and callback is None: raise ValueError("either num_samples or callback must be" " passed so the sampler knows when to stop") if num_samples is not None and num_samples < 1: raise ValueError(f"num_samples must be >= 1, got {num_samples}") if callback is None: callbacks: t.List[t.Callable] = [] elif callable(callback): callbacks = [callback] elif isinstance(callback, list): callbacks = callback else: raise ValueError( f"unsupported type '{type(callback)}' for callback arg") # Initialize should_timeout = pipeline_timeout is not None best_score = metric.worst_value best_pipeline = None # Conduct the sampling i = 0 while True: i += 1 logger.info(f"sampling pipeline {i}" f"{'/' + str(num_samples) if num_samples else ''}") pipeline = self.sample_pipeline(problem_type, models, transformers) try: with conditional_timeout(pipeline_timeout, should_timeout): # Train the pipeline and check its performance. start_time = time() test_score = evaluator(pipeline, X, y, metric) logger.info(f"achieved test score: {test_score}") if (metric.is_better_than(test_score, best_score) or best_pipeline is None): best_score = test_score best_pipeline = pipeline # Check to see if its time to stop sampling. if callback is not None: # We stop if any callback returns True. train_time = time() - start_time exit_early = any( cb( SamplerState(test_score, pipeline, train_time, i)) for cb in callbacks) if exit_early: break if best_score == metric.best_value: logger.info( f"found best possible score {metric.best_value} early, " "stopping the search") break if num_samples and i >= num_samples: break except EvaluationTimeoutError: logger.info("pipeline took too long to evaluate, skipping") logger.debug(pipeline) except PipelineRunError as e: logger.exception(e) if exit_on_pipeline_error: raise e return best_pipeline, best_score, i
def fit(self, X: pd.DataFrame, y: pd.Series) -> SearchResult: """ The main runtime method of the package. Given a dataset, problem type, and sampling strategy, it tries to find, in a limited amount of time, the best performing pipeline it can. Parameters ---------- X : pandas.DataFrame The features of your dataset. y : pandas.Series The target vector of your dataset. The indices of `X` and `y` should match up. Returns ------- result : SearchResult A named tuple containing data about how the fit process went. """ # Initialize if len(X.index) != y.size: raise ValueError(f"X and y must have the same number of instances") # A little encapsulation to make this `fit` method's code less huge. self.state = SKPlumberFitState(self.budget, self.metric) # Run self.progress.start() best_pipeline, best_score, n_sample_iters = self.sampler.run( X, y, models=self._models_map[self.problem_type], transformers=list(transformers.values()), problem_type=self.problem_type, metric=self.metric, evaluator=self.evaluator, pipeline_timeout=self.pipeline_timeout, callback=self.sampler_cbs, exit_on_pipeline_error=self.exit_on_pipeline_error, ) self.best_pipeline = best_pipeline self.state.best_score = best_score logger.info(f"found best validation score of {best_score}") logger.info("best pipeline:") logger.info(self.best_pipeline) if self.tune: logger.info( "now performing hyperparameter tuning on best found pipeline..." ) tune_result = ga_tune( self.best_pipeline, X, y, self.evaluator, self.metric, self.exit_on_pipeline_error, population_size=( self.best_pipeline.num_params * self.tuning_mult_factor ), callback=self._tuner_callback, ) self.state.best_score = tune_result.best_score n_tune_iters = tune_result.n_evals else: n_tune_iters = 0 # Now that we have the "best" model, train it on # the full dataset so it can see as much of the # dataset's distribution as possible in an effort # to be more ready for the wild. self.best_pipeline.fit(X, y) logger.info( "finished. total execution time: " f"{time() - self.state.starttime:.2f} seconds." ) logger.info(f"final best score found: {self.state.best_score}") result = SearchResult( time() - self.state.starttime, n_sample_iters, n_tune_iters, self.state.best_score, ) # Fitting completed successfully self.is_fitted = True return result
def ga_tune( pipeline: Pipeline, X: pd.DataFrame, y: pd.Series, evaluator: t.Callable, metric: Metric, exit_on_pipeline_error: bool = True, **flexgakwargs, ) -> TuneResult: """ Performs a genetic algorithm hyperparameter tuning on `pipeline`, returning the best score it could find and the number of evaluations it completed. Essentially performs a `.fit` operation on the pipeline, where the pipeine is fit with the best performing hyperparameter configuration it could find. Returns ------- result : TuneResult A named tuple containing data about how the tuning process went. """ # See what score the model gets without any tuning starting_params = pipeline.get_params() starting_score = evaluator(pipeline, X, y, metric) # keep track of how many iterations were completed n_evals = 1 # we already completed one def objective(*args, **flexga_params) -> float: """ The objective function the genetic algorithm will try to maximize. """ params = _get_params_from_flexga(flexga_params) nonlocal n_evals try: pipeline.set_params(params) score = evaluator(pipeline, X, y, metric) except PipelineRunError as e: logger.exception(e) if exit_on_pipeline_error: raise e # Pipelines that make errors are bad. # TODO: make this `None` or `np.nan` instead. score = metric.worst_value n_evals += 1 # The genetic algorithm tries to maximize return -score if metric.opt_dir == OptimizationDirection.MINIMIZE else score # Use flexga to find the best hyperparameter configuration it can. optimal_score, _, optimal_flexga_params = flexga( objective, kwargsmeta=_get_flexga_metas(pipeline, X), **flexgakwargs) if metric.is_better_than(optimal_score, starting_score): optimal_params = _get_params_from_flexga(optimal_flexga_params) did_improve = True else: # The tuner couldn't find anything better than the params the # pipeline started with under the conditions given. optimal_score = starting_score optimal_params = starting_params did_improve = False pipeline.set_params(optimal_params) pipeline.fit(X, y) logger.info("tuning complete.") logger.info(f"found best pipeline configuration: {pipeline}") logger.info(f"found best validation score of {optimal_score}") return TuneResult(optimal_score, n_evals, did_improve)