def test_log_fn_args_as_params(args, kwargs, expected, start_run): # pylint: disable=W0613 log_fn_args_as_params(dummy_fn, args, kwargs) client = kiwi.tracking.MlflowClient() params = client.get_run(kiwi.active_run().info.run_id).data.params for arg, value in zip(['arg1', 'arg2', 'arg3'], expected): assert arg in params assert params[arg] == value
def _run_and_log_function(self, original, args, kwargs, unlogged_params, callback_arg_index): if not kiwi.active_run(): try_mlflow_log(kiwi.start_run) auto_end_run = True else: auto_end_run = False log_fn_args_as_params(original, [self] + list(args), kwargs, unlogged_params) callbacks = [cb(self) for cb in self.callback_fns] + (self.callbacks or []) # Checking if the 'callback' argument of the function is set if len(args) > callback_arg_index: tmp_list = list(args) callbacks += list(args[callback_arg_index]) tmp_list[callback_arg_index] += [__MLflowFastaiCallback(self)] args = tuple(tmp_list) elif 'callbacks' in kwargs: callbacks += list(kwargs['callbacks']) kwargs['callbacks'] += [__MLflowFastaiCallback(self)] else: kwargs['callbacks'] = [__MLflowFastaiCallback(self)] early_stop_callback = _find_callback_of_type(EarlyStoppingCallback, callbacks) one_cycle_callback = _find_callback_of_type(OneCycleScheduler, callbacks) _log_early_stop_callback_params(early_stop_callback) _log_one_cycle_callback_params(one_cycle_callback) result = original(self, *args, **kwargs) if auto_end_run: try_mlflow_log(kiwi.end_run) return result
def _run_and_log_function(self, original, args, kwargs, unlogged_params, callback_arg_index): if not kiwi.active_run(): try_mlflow_log(kiwi.start_run) auto_end_run = True else: auto_end_run = False log_fn_args_as_params(original, args, kwargs, unlogged_params) early_stop_callback = None # Checking if the 'callback' argument of the function is set if len(args) > callback_arg_index: tmp_list = list(args) early_stop_callback = _early_stop_check( tmp_list[callback_arg_index]) tmp_list[callback_arg_index] += [__MLflowKerasCallback()] args = tuple(tmp_list) elif 'callbacks' in kwargs: early_stop_callback = _early_stop_check(kwargs['callbacks']) kwargs['callbacks'] += [__MLflowKerasCallback()] else: kwargs['callbacks'] = [__MLflowKerasCallback()] _log_early_stop_callback_params(early_stop_callback) history = original(self, *args, **kwargs) _log_early_stop_callback_metrics(early_stop_callback, history) if auto_end_run: try_mlflow_log(kiwi.end_run) return history
def fit_generator(self, *args, **kwargs): with _manage_active_run(): original = gorilla.get_original_attribute(tensorflow.keras.Model, 'fit_generator') unlogged_params = [ 'self', 'generator', 'callbacks', 'validation_data', 'verbose' ] log_fn_args_as_params(original, args, kwargs, unlogged_params) # Checking if the 'callback' argument of fit() is set if len(args) >= 5: tmp_list = list(args) tmp_list[4], log_dir = _setup_callbacks(tmp_list[4]) args = tuple(tmp_list) elif 'callbacks' in kwargs: kwargs['callbacks'], log_dir = _setup_callbacks( kwargs['callbacks']) else: kwargs['callbacks'], log_dir = _setup_callbacks([]) result = original(self, *args, **kwargs) _flush_queue() _log_artifacts_with_warning(local_dir=log_dir.location, artifact_path='tensorboard_logs') if log_dir.is_temp: shutil.rmtree(log_dir.location) return result
def test_log_fn_args_as_params_ignores_unwanted_parameters(start_run): # pylint: disable=W0613 args, kwargs, unlogged = ('arg1', { 'arg2': 'value' }, ['arg1', 'arg2', 'arg3']) log_fn_args_as_params(dummy_fn, args, kwargs, unlogged) client = kiwi.tracking.MlflowClient() params = client.get_run(kiwi.active_run().info.run_id).data.params assert len(params.keys()) == 0
def fit(self, *args, **kwargs): with _manage_active_run(): original = gorilla.get_original_attribute(tensorflow.keras.Model, 'fit') unlogged_params = [ 'self', 'x', 'y', 'callbacks', 'validation_data', 'verbose' ] log_fn_args_as_params(original, args, kwargs, unlogged_params) early_stop_callback = None # Checking if the 'callback' argument of fit() is set if len(args) >= 6: tmp_list = list(args) early_stop_callback = _early_stop_check(tmp_list[5]) tmp_list[5], log_dir = _setup_callbacks(tmp_list[5]) args = tuple(tmp_list) elif 'callbacks' in kwargs: early_stop_callback = _early_stop_check(kwargs['callbacks']) kwargs['callbacks'], log_dir = _setup_callbacks( kwargs['callbacks']) else: kwargs['callbacks'], log_dir = _setup_callbacks([]) _log_early_stop_callback_params(early_stop_callback) history = original(self, *args, **kwargs) _log_early_stop_callback_metrics(early_stop_callback, history) _flush_queue() _log_artifacts_with_warning(local_dir=log_dir.location, artifact_path='tensorboard_logs') if log_dir.is_temp: shutil.rmtree(log_dir.location) return history
def train(*args, **kwargs): def record_eval_results(eval_results): """ Create a callback function that records evaluation results. """ def callback(env): eval_results.append(dict(env.evaluation_result_list)) return callback if not kiwi.active_run(): try_mlflow_log(kiwi.start_run) auto_end_run = True else: auto_end_run = False def log_feature_importance_plot(features, importance, importance_type): """ Log feature importance plot. """ import matplotlib.pyplot as plt features = np.array(features) importance = np.array(importance) indices = np.argsort(importance) features = features[indices] importance = importance[indices] num_features = len(features) # If num_features > 10, increase the figure height to prevent the plot # from being too dense. w, h = [6.4, 4.8] # matplotlib's default figure size h = h + 0.1 * num_features if num_features > 10 else h fig, ax = plt.subplots(figsize=(w, h)) yloc = np.arange(num_features) ax.barh(yloc, importance, align='center', height=0.5) ax.set_yticks(yloc) ax.set_yticklabels(features) ax.set_xlabel('Importance') ax.set_title('Feature Importance ({})'.format(importance_type)) fig.tight_layout() tmpdir = tempfile.mkdtemp() try: # pylint: disable=undefined-loop-variable filepath = os.path.join(tmpdir, 'feature_importance_{}.png'.format(imp_type)) fig.savefig(filepath) try_mlflow_log(kiwi.log_artifact, filepath) finally: plt.close(fig) shutil.rmtree(tmpdir) original = gorilla.get_original_attribute(xgboost, 'train') # logging booster params separately via mlflow.log_params to extract key/value pairs # and make it easier to compare them across runs. params = args[0] if len(args) > 0 else kwargs['params'] try_mlflow_log(kiwi.log_params, params) unlogged_params = ['params', 'dtrain', 'evals', 'obj', 'feval', 'evals_result', 'xgb_model', 'callbacks', 'learning_rates'] log_fn_args_as_params(original, args, kwargs, unlogged_params) all_arg_names = inspect.getargspec(original)[0] # pylint: disable=W1505 num_pos_args = len(args) # adding a callback that records evaluation results. eval_results = [] callbacks_index = all_arg_names.index('callbacks') callback = record_eval_results(eval_results) if num_pos_args >= callbacks_index + 1: tmp_list = list(args) tmp_list[callbacks_index] += [callback] args = tuple(tmp_list) elif 'callbacks' in kwargs and kwargs['callbacks'] is not None: kwargs['callbacks'] += [callback] else: kwargs['callbacks'] = [callback] # training model model = original(*args, **kwargs) # logging metrics on each iteration. for idx, metrics in enumerate(eval_results): try_mlflow_log(kiwi.log_metrics, metrics, step=idx) # If early_stopping_rounds is present, logging metrics at the best iteration # as extra metrics with the max step + 1. early_stopping_index = all_arg_names.index('early_stopping_rounds') early_stopping = (num_pos_args >= early_stopping_index + 1 or 'early_stopping_rounds' in kwargs) if early_stopping: extra_step = len(eval_results) try_mlflow_log(kiwi.log_metric, 'stopped_iteration', len(eval_results) - 1) try_mlflow_log(kiwi.log_metric, 'best_iteration', model.best_iteration) try_mlflow_log(kiwi.log_metrics, eval_results[model.best_iteration], step=extra_step) # logging feature importance as artifacts. for imp_type in importance_types: imp = model.get_score(importance_type=imp_type) features, importance = zip(*imp.items()) try: log_feature_importance_plot(features, importance, imp_type) except Exception: # pylint: disable=broad-except _logger.exception('Failed to log feature importance plot. LightGBM autologging ' 'will ignore the failure and continue. Exception: ') tmpdir = tempfile.mkdtemp() try: filepath = os.path.join(tmpdir, 'feature_importance_{}.json'.format(imp_type)) with open(filepath, 'w') as f: json.dump(imp, f) try_mlflow_log(kiwi.log_artifact, filepath) finally: shutil.rmtree(tmpdir) try_mlflow_log(log_model, model, artifact_path='model') if auto_end_run: try_mlflow_log(kiwi.end_run) return model