def on_epoch_end(self, epoch, logs=None): """ Log Keras metrics with MLflow. If model improved on the validation data, evaluate it on a test set and store it as the best model. """ if not logs: return self._next_step = epoch + 1 train_loss = logs["loss"] val_loss = logs["val_loss"] kiwi.log_metrics({ self.train_loss: train_loss, self.val_loss: val_loss }, step=epoch) if val_loss < self._best_val_loss: # The result improved in the validation set. # Log the model with mlflow and also evaluate and log on test set. self._best_train_loss = train_loss self._best_val_loss = val_loss self._best_model = keras.models.clone_model(self.model) self._best_model.set_weights( [x.copy() for x in self.model.get_weights()]) preds = self._best_model.predict(self._test_x) eval_and_log_metrics("test", self._test_y, preds, epoch)
def eval(parms): lr, momentum = parms with kiwi.start_run(nested=True) as child_run: p = kiwi.projects.run(run_id=child_run.info.run_id, uri=".", entry_point="train", parameters={ "training_data": training_data, "epochs": str(nepochs), "learning_rate": str(lr), "momentum": str(momentum), "seed": str(seed) }, experiment_id=experiment_id, synchronous=False) succeeded = p.wait() if succeeded: training_run = tracking_client.get_run(p.run_id) metrics = training_run.data.metrics # cap the loss at the loss of the null model train_loss = min(null_train_loss, metrics[train_metric]) val_loss = min(null_val_loss, metrics[val_metric]) test_loss = min(null_test_loss, metrics[test_metric]) else: # run failed => return null loss tracking_client.set_terminated(p.run_id, "FAILED") train_loss = null_train_loss val_loss = null_val_loss test_loss = null_test_loss kiwi.log_metrics({ "train_{}".format(metric): train_loss, "val_{}".format(metric): val_loss, "test_{}".format(metric): test_loss }) return p.run_id, train_loss, val_loss, test_loss
def eval(params): """ Train Keras model with given parameters by invoking MLflow run. Notice we store runUuid and resulting metric in a file. We will later use these to pick the best run and to log the runUuids of the child runs as an artifact. This is a temporary workaround until MLflow offers better mechanism of linking runs together. :param params: Parameters to the train_keras script we optimize over: learning_rate, drop_out_1 :return: The metric value evaluated on the validation data. """ lr, momentum = params[0] with kiwi.start_run(nested=True) as child_run: p = kiwi.projects.run(run_id=child_run.info.run_id, uri=".", entry_point="train", parameters={ "training_data": training_data, "epochs": str(nepochs), "learning_rate": str(lr), "momentum": str(momentum), "seed": str(seed) }, experiment_id=experiment_id, synchronous=False) succeeded = p.wait() if succeeded: training_run = tracking_client.get_run(p.run_id) metrics = training_run.data.metrics # cap the loss at the loss of the null model train_loss = min(null_valid_loss, metrics["train_{}".format(metric)]) valid_loss = min(null_valid_loss, metrics["val_{}".format(metric)]) test_loss = min(null_test_loss, metrics["test_{}".format(metric)]) else: # run failed => return null loss tracking_client.set_terminated(p.run_id, "FAILED") train_loss = null_train_loss valid_loss = null_valid_loss test_loss = null_test_loss kiwi.log_metrics({ "train_{}".format(metric): train_loss, "val_{}".format(metric): valid_loss, "test_{}".format(metric): test_loss }) if return_all: return train_loss, valid_loss, test_loss else: return valid_loss
def test_log_metrics_uses_common_timestamp_and_step_per_invocation(step_kwarg): expected_metrics = {"name_1": 30, "name_2": -3, "nested/nested/name": 40} with start_run() as active_run: run_id = active_run.info.run_id kiwi.log_metrics(expected_metrics, step=step_kwarg) finished_run = tracking.MlflowClient().get_run(run_id) # Validate metric key/values match what we expect, and that all metrics have the same timestamp assert len(finished_run.data.metrics) == len(expected_metrics) for key, value in finished_run.data.metrics.items(): assert expected_metrics[key] == value common_timestamp = finished_run.data._metric_objs[0].timestamp expected_step = step_kwarg if step_kwarg is not None else 0 for metric_obj in finished_run.data._metric_objs: assert metric_obj.timestamp == common_timestamp assert metric_obj.step == expected_step
def main(): # parse command-line arguments args = parse_args() # prepare train and test data iris = datasets.load_iris() X = iris.data y = iris.target X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) train_set = lgb.Dataset(X_train, label=y_train) # enable auto logging kiwi.lightgbm.autolog() with kiwi.start_run(): # train model params = { 'objective': 'multiclass', 'num_class': 3, 'learning_rate': args.learning_rate, 'metric': 'multi_logloss', 'colsample_bytree': args.colsample_bytree, 'subsample': args.subsample, 'seed': 42, } model = lgb.train(params, train_set, num_boost_round=10, valid_sets=[train_set], valid_names=['train']) # evaluate model y_proba = model.predict(X_test) y_pred = y_proba.argmax(axis=1) loss = log_loss(y_test, y_proba) acc = accuracy_score(y_test, y_pred) # log metrics kiwi.log_metrics({'log_loss': loss, 'accuracy': acc})
def test_log_metrics_uses_millisecond_timestamp_resolution_fluent(): with start_run() as active_run, mock.patch("time.time") as time_mock: time_mock.side_effect = lambda: 123 kiwi.log_metrics({ "name_1": 25, "name_2": -3, }) kiwi.log_metrics({ "name_1": 30, }) kiwi.log_metrics({ "name_1": 40, }) run_id = active_run.info.run_id client = tracking.MlflowClient() metric_history_name1 = client.get_metric_history(run_id, "name_1") assert set([(m.value, m.timestamp) for m in metric_history_name1]) == set([ (25, 123 * 1000), (30, 123 * 1000), (40, 123 * 1000), ]) metric_history_name2 = client.get_metric_history(run_id, "name_2") assert set([(m.value, m.timestamp) for m in metric_history_name2]) == set([ (-3, 123 * 1000), ])
def main(): # parse command-line arguments args = parse_args() # prepare train and test data iris = datasets.load_iris() X = iris.data y = iris.target X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) dtrain = xgb.DMatrix(X_train, label=y_train) dtest = xgb.DMatrix(X_test, label=y_test) # enable auto logging kiwi.xgboost.autolog() with kiwi.start_run(): # train model params = { 'objective': 'multi:softprob', 'num_class': 3, 'learning_rate': args.learning_rate, 'eval_metric': 'mlogloss', 'colsample_bytree': args.colsample_bytree, 'subsample': args.subsample, 'seed': 42, } model = xgb.train(params, dtrain, evals=[(dtrain, 'train')]) # evaluate model y_proba = model.predict(dtest) y_pred = y_proba.argmax(axis=1) loss = log_loss(y_test, y_proba) acc = accuracy_score(y_test, y_pred) # log metrics kiwi.log_metrics({'log_loss': loss, 'accuracy': acc})
def run(training_data, max_runs, max_p, epochs, metric, seed): train_metric = "train_{}".format(metric) val_metric = "val_{}".format(metric) test_metric = "test_{}".format(metric) np.random.seed(seed) tracking_client = kiwi.tracking.MlflowClient() def new_eval(nepochs, experiment_id, null_train_loss=_inf, null_val_loss=_inf, null_test_loss=_inf): def eval(parms): lr, momentum = parms with kiwi.start_run(nested=True) as child_run: p = kiwi.projects.run(run_id=child_run.info.run_id, uri=".", entry_point="train", parameters={ "training_data": training_data, "epochs": str(nepochs), "learning_rate": str(lr), "momentum": str(momentum), "seed": str(seed) }, experiment_id=experiment_id, synchronous=False) succeeded = p.wait() if succeeded: training_run = tracking_client.get_run(p.run_id) metrics = training_run.data.metrics # cap the loss at the loss of the null model train_loss = min(null_train_loss, metrics[train_metric]) val_loss = min(null_val_loss, metrics[val_metric]) test_loss = min(null_test_loss, metrics[test_metric]) else: # run failed => return null loss tracking_client.set_terminated(p.run_id, "FAILED") train_loss = null_train_loss val_loss = null_val_loss test_loss = null_test_loss kiwi.log_metrics({ "train_{}".format(metric): train_loss, "val_{}".format(metric): val_loss, "test_{}".format(metric): test_loss }) return p.run_id, train_loss, val_loss, test_loss return eval with kiwi.start_run() as run: experiment_id = run.info.experiment_id _, null_train_loss, null_val_loss, null_test_loss = new_eval( 0, experiment_id)((0, 0)) runs = [(np.random.uniform(1e-5, 1e-1), np.random.uniform(0, 1.0)) for _ in range(max_runs)] with ThreadPoolExecutor(max_workers=max_p) as executor: _ = executor.map( new_eval(epochs, experiment_id, null_train_loss, null_val_loss, null_test_loss), runs) # find the best run, log its metrics as the final metrics of this run. client = MlflowClient() runs = client.search_runs( [experiment_id], "tags.mlflow.parentRunId = '{run_id}' ".format( run_id=run.info.run_id)) best_val_train = _inf best_val_valid = _inf best_val_test = _inf best_run = None for r in runs: if r.data.metrics["val_rmse"] < best_val_valid: best_run = r best_val_train = r.data.metrics["train_rmse"] best_val_valid = r.data.metrics["val_rmse"] best_val_test = r.data.metrics["test_rmse"] kiwi.set_tag("best_run", best_run.info.run_id) kiwi.log_metrics({ "train_{}".format(metric): best_val_train, "val_{}".format(metric): best_val_valid, "test_{}".format(metric): best_val_test })
def run(training_data, max_runs, batch_size, max_p, epochs, metric, gpy_model, gpy_acquisition, initial_design, seed): bounds = [ { 'name': 'lr', 'type': 'continuous', 'domain': (1e-5, 1e-1) }, { 'name': 'momentum', 'type': 'continuous', 'domain': (0.0, 1.0) }, ] # create random file to store run ids of the training tasks tracking_client = kiwi.tracking.MlflowClient() def new_eval(nepochs, experiment_id, null_train_loss, null_valid_loss, null_test_loss, return_all=False): """ Create a new eval function :param nepochs: Number of epochs to train the model. :experiment_id: Experiment id for the training run :valid_null_loss: Loss of a null model on the validation dataset :test_null_loss: Loss of a null model on the test dataset. :return_test_loss: Return both validation and test loss if set. :return: new eval function. """ def eval(params): """ Train Keras model with given parameters by invoking MLflow run. Notice we store runUuid and resulting metric in a file. We will later use these to pick the best run and to log the runUuids of the child runs as an artifact. This is a temporary workaround until MLflow offers better mechanism of linking runs together. :param params: Parameters to the train_keras script we optimize over: learning_rate, drop_out_1 :return: The metric value evaluated on the validation data. """ lr, momentum = params[0] with kiwi.start_run(nested=True) as child_run: p = kiwi.projects.run(run_id=child_run.info.run_id, uri=".", entry_point="train", parameters={ "training_data": training_data, "epochs": str(nepochs), "learning_rate": str(lr), "momentum": str(momentum), "seed": str(seed) }, experiment_id=experiment_id, synchronous=False) succeeded = p.wait() if succeeded: training_run = tracking_client.get_run(p.run_id) metrics = training_run.data.metrics # cap the loss at the loss of the null model train_loss = min(null_valid_loss, metrics["train_{}".format(metric)]) valid_loss = min(null_valid_loss, metrics["val_{}".format(metric)]) test_loss = min(null_test_loss, metrics["test_{}".format(metric)]) else: # run failed => return null loss tracking_client.set_terminated(p.run_id, "FAILED") train_loss = null_train_loss valid_loss = null_valid_loss test_loss = null_test_loss kiwi.log_metrics({ "train_{}".format(metric): train_loss, "val_{}".format(metric): valid_loss, "test_{}".format(metric): test_loss }) if return_all: return train_loss, valid_loss, test_loss else: return valid_loss return eval with kiwi.start_run() as run: experiment_id = run.info.experiment_id # Evaluate null model first. # We use null model (predict everything to the mean) as a reasonable upper bound on loss. # We need an upper bound to handle the failed runs (e.g. return NaNs) because GPyOpt can not # handle Infs. # Always including a null model in our results is also a good ML practice. train_null_loss, valid_null_loss, test_null_loss = new_eval( 0, experiment_id, _inf, _inf, _inf, True)(params=[[0, 0]]) myProblem = GPyOpt.methods.BayesianOptimization( new_eval(epochs, experiment_id, train_null_loss, valid_null_loss, test_null_loss), bounds, evaluator_type="local_penalization" if min(batch_size, max_p) > 1 else "sequential", batch_size=batch_size, num_cores=max_p, model_type=gpy_model, acquisition_type=gpy_acquisition, initial_design_type=initial_design, initial_design_numdata=max_runs >> 2, exact_feval=False) myProblem.run_optimization(max_runs) matplotlib.use('agg') plt.switch_backend('agg') with TempDir() as tmp: acquisition_plot = tmp.path("acquisition_plot.png") convergence_plot = tmp.path("convergence_plot.png") myProblem.plot_acquisition(filename=acquisition_plot) myProblem.plot_convergence(filename=convergence_plot) if os.path.exists(convergence_plot): kiwi.log_artifact(convergence_plot, "converegence_plot") if os.path.exists(acquisition_plot): kiwi.log_artifact(acquisition_plot, "acquisition_plot") # find the best run, log its metrics as the final metrics of this run. client = MlflowClient() runs = client.search_runs( [experiment_id], "tags.mlflow.parentRunId = '{run_id}' ".format( run_id=run.info.run_id)) best_val_train = _inf best_val_valid = _inf best_val_test = _inf best_run = None for r in runs: if r.data.metrics["val_rmse"] < best_val_valid: best_run = r best_val_train = r.data.metrics["train_rmse"] best_val_valid = r.data.metrics["val_rmse"] best_val_test = r.data.metrics["test_rmse"] kiwi.set_tag("best_run", best_run.info.run_id) kiwi.log_metrics({ "train_{}".format(metric): best_val_train, "val_{}".format(metric): best_val_valid, "test_{}".format(metric): best_val_test })