def test_report_bin(): t1 = Trial("a", dict(a=1, b=2), keys=["x", "y"]) r1 = TrialReport(t1, 0.8, sort_metric=-0.8) t2 = Trial("b", dict(a=11, b=12), keys=["xx", "y"]) r2 = TrialReport(t2, 0.7, sort_metric=-0.7) t3 = Trial("c", dict(a=10, b=20), keys=["x", "y"]) r3 = TrialReport(t3, 0.9, sort_metric=-0.9) b = _ReportBin() assert 0 == len(b.records) assert b.best is None assert b.on_report(r1) assert b.on_report(r3) assert r3 is b.best assert 2 == len(b.records) b = _ReportBin() assert b.on_report(r3) assert not b.on_report(r1) assert r3 is b.best assert 2 == len(b.records) b = _ReportBin(new_best_only=True) assert b.on_report(r3) assert not b.on_report(r1) assert r3 is b.best assert 1 == len(b.records)
def test_print_best(): t1 = Trial("a", dict(a=1, b=2), keys=["x", "y"]) r1 = TrialReport(t1, 0.8, sort_metric=-0.8) t2 = Trial("b", dict(a=11, b=12), keys=["xx", "y"]) r2 = TrialReport(t2, 0.7, sort_metric=-0.7) t3 = Trial("c", dict(a=10, b=20), keys=["x", "y"]) r3 = TrialReport(t3, 0.9, sort_metric=-0.9) b = PrintBest() b.on_report(r3) b.on_report(r2) b.on_report(r1)
def run(self, trial: Trial) -> TrialReport: params = dict(trial.params) if trial.trial_id != self._last_id: self._model_type = to_sk_model(params.pop(SPACE_MODEL_NAME)) self._model_expr = to_sk_model_expr(self._model_type) self._train_x, self._train_y = self._reset_xy( trial.dfs[TUNE_DATASET_DF_DEFAULT_NAME] ) self._test_x, self._test_y = self._reset_xy( trial.dfs[TUNE_DATASET_VALIDATION_DF_DEFAULT_NAME] ) self._last_id = trial.trial_id else: params.pop(SPACE_MODEL_NAME) model = self._model_type(**params).fit(self._train_x, self._train_y) metric = get_scorer(self._scoring)(model, self._test_x, self._test_y) metadata = dict(model=self._model_expr) if self._checkpoint_path is not None: fp = os.path.join(self._checkpoint_path, str(uuid4()) + ".pkl") with FileSystem().openbin(fp, mode="wb") as f: pickle.dump(model, f) metadata["checkpoint_path"] = fp return TrialReport( trial, metric=metric, metadata=metadata, sort_metric=self.generate_sort_metric(metric), )
def run(self, trial: Trial) -> TrialReport: params = dict(trial.params) if trial.trial_id != self._last_id: self._model_type = to_sk_model(params.pop(SPACE_MODEL_NAME)) self._model_expr = to_sk_model_expr(self._model_type) self._train_x, self._train_y = self._reset_xy( trial.dfs[TUNE_DATASET_DF_DEFAULT_NAME] ) self._last_id = trial.trial_id else: params.pop(SPACE_MODEL_NAME) model = self._model_type(**params) s = cross_val_score( model, self._train_x, self._train_y, cv=self._cv, scoring=self._scoring ) metadata = dict(model=self._model_expr, cv_scores=[float(x) for x in s]) if self._checkpoint_path is not None: model.fit(self._train_x, self._train_y) fp = os.path.join(self._checkpoint_path, str(uuid4()) + ".pkl") with FileSystem().openbin(fp, mode="wb") as f: pickle.dump(model, f) metadata["checkpoint_path"] = fp metric = float(np.mean(s)) return TrialReport( trial, metric=metric, metadata=metadata, sort_metric=self.generate_sort_metric(metric), )
def run_single_iteration(self) -> TrialReport: self.step += 1 trial = self.current_trial return TrialReport( trial=trial, metric=f( self.step, trial.params.simple_value["a"], trial.params.simple_value["b"], ), )
def _test_charts(): t1 = Trial("a", dict(a=1, b=2), keys=["x", "y"]) r1 = TrialReport(t1, 0.8, sort_metric=-0.8) t2 = Trial("b", dict(a=11, b=12), keys=["xx", "y"]) r2 = TrialReport(t2, 0.7, sort_metric=-0.7) t3 = Trial("c", dict(a=10, b=20), keys=["x", "y"]) r3 = TrialReport(t3, 0.9, sort_metric=-0.9) b = NotebookSimpleTimeSeries("1sec") b.on_report(r3) b.on_report(r2) b.on_report(r1) b = NotebookSimpleHist("2sec") b.on_report(r3) b.on_report(r2) b.on_report(r1) b = NotebookSimpleRungs("3sec") b.on_report(r3) b.on_report(r2) b.on_report(r1)
def run_single_rung(self, budget: float) -> TrialReport: trial = self.current_trial fit_args, fit_kwargs = self.spec.get_fit_params() fit_kwargs = dict(fit_kwargs) fit_kwargs.update( dict(epochs=self._epochs + int(budget), initial_epoch=self._epochs)) h = self.model.fit(*fit_args, **fit_kwargs) metric = self.spec.get_fit_metric(h) self._epochs += int(budget) return TrialReport(trial=trial, metric=metric, cost=budget, rung=self.rung)
def suggest_sk_models_by_cv( space: Space, train_df: Any, scoring: str, cv: int = 5, temp_path: str = "", feature_prefix: str = "", label_col: str = "label", save_model: bool = False, partition_keys: Optional[List[str]] = None, top_n: int = 1, objective_runner: Optional[NonIterativeObjectiveRunner] = None, monitor: Any = None, distributed: Optional[bool] = None, execution_engine: Any = None, execution_engine_conf: Any = None, ) -> List[TrialReport]: dag = FugueWorkflow() dataset = TUNE_OBJECT_FACTORY.make_dataset( dag, space, df=train_df, partition_keys=partition_keys, temp_path=temp_path, ) objective = SKCVObjective( scoring=scoring, cv=cv, feature_prefix=feature_prefix, label_col=label_col, checkpoint_path=temp_path if save_model else None, ) study = optimize_noniterative( objective=objective, dataset=dataset, runner=objective_runner, distributed=distributed, monitor=monitor, ) study.result(top_n).yield_dataframe_as("result") rows = list( dag.run( execution_engine, conf=execution_engine_conf, )["result"].as_dict_iterable()) return [ TrialReport.from_jsondict(json.loads(r[TUNE_REPORT])) for r in sorted(rows, key=lambda r: r[TUNE_REPORT_METRIC]) ]