Exemplo n.º 1
0
 def __init__(self, arguments, model_name, base_model):
     self.__cnvrg_env = True
     self.__arguments = cast_input_types(arguments)
     self.__shape = (arguments.image_height, arguments.image_width)
     self.__classes = parse_classes(arguments.data)
     self.__channels = TensorflowTrainer.RGB_CHANNELS if arguments.image_color == 'rgb' \
      else TensorflowTrainer.GRAYSCALE_CHANNELS
     self.__model = ModelGenerator(
         base_model=base_model,
         num_of_classes=len(self.__classes),
         fully_connected_layers=TensorflowTrainer.fully_connected_layers,
         loss_function=arguments.loss,
         dropout=arguments.dropout,
         activation_hidden_layers=arguments.hidden_layer_activation,
         activation_output_layers=arguments.output_layer_activation,
         optimizer=arguments.optimizer).get_model()
     try:
         self.__experiment = Experiment()
     except cnvrg.modules.UserError:
         self.__cnvrg_env = False
     self.__metrics = {
         'tensorflow local version': tf.__version__,
         'GPUs found':
         len(tf.config.experimental.list_physical_devices('GPU')),
         'Model': model_name,
         'Classes list': self.__classes
     }
    def log_trial_result(self, iteration, trial, result):
        e = CNVRGExperiment(self._cnvrg_experiments[trial.trial_id])
        e.log(str(result))
        if self._cnvrg_metrics == []:
            self._cnvrg_metrics = [key for key in result]

        training_iteration = result['training_iteration']
        for key in self._cnvrg_metrics:
            try:
                value = float(result[key])
            except (ValueError, TypeError):
                continue
            e.log_metric(key, value, training_iteration)
Exemplo n.º 3
0
	def __init__(self, model, train_set, test_set, output_model_name, testing_mode, folds=None):
		self.__model = model
		self.__x_train, self.__y_train = train_set
		self.__x_test, self.__y_test = test_set
		self.__all_data_concatenated = pd.concat([pd.concat([self.__x_train, self.__x_test], axis=0),
												  pd.concat([self.__y_train, self.__y_test], axis=0)], axis=1)
		self.__testing_mode = testing_mode
		self.__cross_val_folds = folds
		self.__is_cross_val = (folds is not None)
		self.__features = list(self.__x_train.columns)
		self.__labels = [str(l) for l in list(set(self.__y_train).union(set(self.__y_test)))]
		self.__metrics = {'model': output_model_name, 'train set size': len(self.__y_train), 'test set size': len(self.__y_test)}
		self.__experiment = Experiment()
Exemplo n.º 4
0
    def __init__(self,
                 path_to_csv,
                 target_column=None,
                 missing_dict=None,
                 scale_dict=None,
                 normalize_list=None,
                 one_hot_list=None,
                 output_name=None,
                 plot_vis=False):
        """
		:param path_to_csv: string
		:param target_column: string
		:param missing_dict: dict
		:param scale_dict: dict
		:param normalize_list: list
		:param one_hot_list: list
		:param output_name: string
		"""
        self.__cnvrg_env = True  ### When testing locally, it is turned False.
        self.__data = pd.read_csv(path_to_csv, index_col=0)
        self.__target_column = (
            target_column,
            self.__data[target_column]) if target_column is not None else (
                self.__data.columns[-1], self.__data[self.__data.columns[-1]])
        self.__features = [
            f for f in list(self.__data.columns)
            if f != self.__target_column[0]
        ]
        self.__data = self.__data[
            self.__features]  #  removes the target column.
        try:
            self.__experiment = Experiment()
        except cnvrg.modules.errors.UserError:
            self.__cnvrg_env = False

        self.__normalize_list = CSVProcessor.__parse_list(
            normalize_list) if isinstance(normalize_list,
                                          str) else normalize_list
        self.__one_hot_list = CSVProcessor.__parse_list(
            one_hot_list) if isinstance(one_hot_list, str) else one_hot_list
        self.__output_name = output_name if output_name is not None else path_to_csv.split(
            '.csv')[0] + '_processed.csv'
        self.__plot_vis = plot_vis

        ### changed to list of lists instead of dictionary:
        self.__scale_dict = CSVProcessor.__parse_2d_list(
            scale_dict) if isinstance(scale_dict, str) else scale_dict
        self.__missing_dict = CSVProcessor.__parse_2d_list(
            missing_dict) if isinstance(missing_dict, str) else missing_dict
Exemplo n.º 5
0
    def __init__(self, input, to, template, inplace, allow_errors):
        self.__cnvrg_env = True  # When testing locally, it is turned False.
        self.input = input
        self.to = to
        self.template = template
        self.inplace = inplace
        self.allow_errors = allow_errors

        try:
            self.__experiment = Experiment()
        except:
            self.__cnvrg_env = False

        if self.__cnvrg_env:
            self.__experiment.log_param("template", template)
Exemplo n.º 6
0
def lgbm_reg_cnvrg_api(experiment, artifacts_path, metrics):
    global experiment_file_path

    # type handling when saving json (numpy types)
    def default(o):
        if isinstance(o, np.int) or isinstance(o, np.int16) or isinstance(
                o, np.int32) or isinstance(o, np.int64):
            return int(o)
        if isinstance(o, np.float) or isinstance(o, np.float16) or isinstance(o, np.float32) or \
                isinstance(o, np.float64):
            return float(o)
        raise TypeError

    experiment_ix = experiment.get('ix')
    hyperparams_dumped = json.dumps(experiment.get('hyperparams'),
                                    default=default)
    metrics_dumped = json.dumps(metrics, default=default)

    cmd = "python3 {}".format(experiment_file_path)

    # os.system(cmd)
    e = Experiment.run(cmd,
                       title='lgbm_reg_experiment-{}'.format(
                           experiment.get('ix')),
                       arguments={
                           'experiment_ix': experiment_ix,
                           'hyperparams': "'{}'".format(hyperparams_dumped),
                           'artifacts_path': artifacts_path,
                           'metrics': "'{}'".format(metrics_dumped)
                       },
                       compute='medium',
                       output_dir='research/artifacts',
                       sync_before=False)
Exemplo n.º 7
0
    def __init__(self,
                 model,
                 train_set,
                 test_set,
                 output_model_name,
                 testing_mode,
                 folds=None,
                 regression_type=0):
        self.__model = model
        self.__x_train, self.__y_train = train_set
        self.__train_set_size = len(self.__y_train)
        self.__x_test, self.__y_test = test_set
        self.__test_set_size = len(self.__y_test)
        self.__testing_mode = testing_mode
        self.__cross_val_folds = folds
        self.__is_cross_val = (folds is not None)
        self.__features = list(self.__x_train.columns)
        self.__labels = [
            str(l)
            for l in list(set(self.__y_train).union(set(self.__y_test)))
        ]
        self.__metrics = {'model': output_model_name}
        self.__y_pred = None
        self.__experiment = Experiment.init(
            'test_charts')  # replace with: self.__experiment = Experiment()
        self.__regression_type = SKTrainerRegression.REGRESSION_TYPE[
            regression_type]

        self.__coef, self.__intercept = None, None
Exemplo n.º 8
0
	def __init__(self, model, train_set, test_set, output_model_name, testing_mode, folds=None):
		self.__model = model
		self.__x_train, self.__y_train = train_set
		self.__x_test, self.__y_test = test_set
		self.__output_model_name = output_model_name
		self.__testing_mode = testing_mode
		self.__cross_val_folds = folds
		self.__is_cross_val = (folds is not None)
		self.__features = list(self.__x_train.columns)
		self.__labels = [str(l) for l in list(set(self.__y_train).union(set(self.__y_test)))]

		self.__model.fit(self.__x_train, self.__y_train)
		self.__importance = self.__model.feature_importances_

		self.__experiment = Experiment()

		self.__metrics = {'model': self.__output_model_name}
		if self.__is_cross_val:
			self.__metrics['folds'] = self.__cross_val_folds
Exemplo n.º 9
0
	def __init__(self, model, train_set, test_set, output_model_name, testing_mode):
		self.__model = model
		self.__x_train, _ = (train_set, None) if len(train_set) == 1 else train_set
		self.__train_set_size = len(self.__x_train)
		self.__x_test, self.__y_test = (test_set, None) if len(train_set) == 1 else train_set
		self.__test_set_size = len(self.__x_test)
		self.__testing_mode = testing_mode
		self.__features = list(self.__x_train.columns)
		self.__metrics = {'model': output_model_name}
		self.__labeled = len(train_set) == 2 or len(test_set) == 2  # if any of the sets includes target column.
		# self.__experiment = Experiment()
		self.__experiment = Experiment.init("test_charts")
Exemplo n.º 10
0
 def log_trial_start(self, trial):
     e = CNVRGExperiment.init()
     self._cnvrg_experiments[trial.trial_id] = e['slug']
     config = trial.config.copy()
     config.pop("callbacks", None)
     e.log_param("trial_id", trial.trial_id)
     e.log_param("run_id", trial.trial_id.split("_")[0])
     e.log(str(config))
     for item in config:
         e.log_param(item, config.get(item))
     e.log("======")
     e.log(str(trial))
Exemplo n.º 11
0
def main(args):
    args = cast_types(args)

    df = pd.read_csv(args.data)
    X, y = df.iloc[:, :-1], df.iloc[:, -1]
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

    model = init_model(X.shape)  # <--- Doesn't work with the shape.

    train_metrics = model.fit(X_train,
                              y_train,
                              epochs=args.epochs,
                              batch_size=args.batch_size,
                              validation_split=0.2)

    test_metrics = model.evaluate(X_test, y_test)

    # train_loss = list(np.round(train_metrics.history['loss'], 3))
    # train_acc = list(np.round(train_metrics.history['accuracy'], 3))
    # val_loss = list(np.round(train_metrics.history['val_loss'], 3))
    # val_acc = list(np.round(train_metrics.history['val_accuracy'], 3))
    test_loss = float(test_metrics[0])
    test_acc = float(test_metrics[1])

    exp = Experiment()
    exp.log_param("test_loss", test_loss)
    exp.log_param("test_acc", test_acc)

    model.save("model.h5")
Exemplo n.º 12
0
    def log_trial_end(self, trial, failed):
        e = CNVRGExperiment(self._cnvrg_experiments[trial.trial_id])

        e.log("===== Logging Artifacts =====")
        from os import listdir
        files_list = [
            os.path.join(trial.logdir, p) for p in os.listdir(trial.logdir)
        ]

        e.log_artifacts(files_list)
        e.finish(exit_status=int(failed))
Exemplo n.º 13
0
def lgbm_reg_cnvrg_api(experiment, artifacts_path, metrics):
    # type handling when saving json (numpy types)
    def default(o):
        if isinstance(o, np.int) or isinstance(o, np.int16) or isinstance(
                o, np.int32) or isinstance(o, np.int64):
            return int(o)
        if isinstance(o, np.float) or isinstance(o, np.float16) or isinstance(o, np.float32) or \
                isinstance(o, np.float64):
            return float(o)
        raise TypeError

    experiment_ix = experiment.get('ix')
    hyperparams_dumped = json.dumps(experiment.get('hyperparams'),
                                    default=default)
    metrics_dumped = json.dumps(metrics, default=default)

    # cmd = "python3 research/lgbm_reg/train.py --experiment '{}' --artifacts_path '{}' --metrics '{}'".format(experiment_dumped,
    #                                                                                                          artifacts_path,
    #                                                                                                          metrics_dumped)

    cmd = "python3 research/lgbm_reg/train.py"

    # os.system(cmd)
    e = Experiment.run(cmd,
                       title='lgbm_reg_experiment-{}'.format(
                           experiment.get('ix')),
                       arguments={
                           'experiment_ix': experiment_ix,
                           'hyperparams': "'{}'".format(hyperparams_dumped),
                           'artifacts_path': artifacts_path,
                           'metrics': "'{}'".format(metrics_dumped)
                       },
                       compute='medium',
                       output_dir='research/artifacts',
                       sync_before=False)

    e.pull_artifacts(wait_until_success=True)
Exemplo n.º 14
0
def train_with_cross_validation(model, train_set, test_set, folds, project_dir,
                                output_model_name):
    """
	This method enables sklearn algorithms to perform KFold-cross-validation.
	The method also initates the cnvrg.io experiment with all its metrics.
	:param model: SKlearn model object (initiated).
	:param train_set: tuple. (X_train, y_train). This is going to be used as a training set.
	:param test_set: tuple. (X_test, y_test). This is going to be used as a test set.
	:param folds: number of splits in the cross validation.
	:param project_dir: the path to the directory which indicates where to save the model.
	:param output_model_name: the name of the output model saved on the disk.
	:return: nothing.
	"""
    train_acc, train_loss = [], []
    kf = KFold(n_splits=folds)
    X, y = train_set

    # --- Training.
    for train_index, val_index in kf.split(X):
        X_train, X_val = X.iloc[train_index, :], X.iloc[val_index, :]
        y_train, y_val = y.iloc[train_index], y.iloc[val_index]
        model.fit(X_train, y_train)
        model.n_estimators += 1

        y_hat = model.predict(X_val)  # y_hat is a.k.a y_pred

        acc = accuracy_score(y_val, y_hat)
        loss = mean_squared_error(y_val, y_hat)

        train_acc.append(acc)
        train_loss.append(loss)

    # --- Testing.
    X_test, y_test = test_set
    y_pred = model.predict(X_test)
    test_acc = accuracy_score(y_test, y_pred)
    test_loss = mean_squared_error(y_test, y_pred)

    exp = Experiment()
    exp.log_param("model", output_model_name)
    exp.log_param("folds", folds)
    exp.log_metric("train_acc", train_acc)
    exp.log_metric("train_loss", train_loss)
    exp.log_param("test_acc", test_acc)
    exp.log_param("test_loss", test_loss)

    # Save model.
    output_file_name = project_dir + "/" + output_model_name if project_dir is not None else output_model_name
    pickle.dump(model, open(output_file_name, 'wb'))
Exemplo n.º 15
0
def train_without_cross_validation(model, train_set, test_set, project_dir,
                                   output_model_name):
    """
	The method also initates the cnvrg.io experiment with all its metrics.
	:param model: SKlearn model object (initiated).
	:param train_set: tuple. (X_train, y_train). This is going to be used as a training set.
	:param test_set: tuple. (X_test, y_test). This is going to be used as a test set.
	:param project_dir: the path to the directory which indicates where to save the model.
	:param output_model_name: the name of the output model saved on the disk.
	:return: nothing.
	"""
    X_train, y_train = train_set

    # --- Training.
    model.fit(X_train, y_train)

    y_hat = model.predict(X_train)  # y_hat is a.k.a y_pred

    train_acc = accuracy_score(y_train, y_hat)
    train_loss = mean_squared_error(y_train, y_hat)

    # --- Testing.
    X_test, y_test = test_set
    y_pred = model.predict(X_test)
    test_acc = accuracy_score(y_test, y_pred)
    test_loss = mean_squared_error(y_test, y_pred)

    exp = Experiment()
    exp.log_param("model", output_model_name)
    exp.log_param("train_acc", train_acc)
    exp.log_param("train_loss", train_loss)
    exp.log_param("test_acc", test_acc)
    exp.log_param("test_loss", test_loss)

    # Save model.
    output_file_name = project_dir + "/" + output_model_name if project_dir is not None else output_model_name
    pickle.dump(model, open(output_file_name, 'wb'))
Exemplo n.º 16
0
import os
import time
from cnvrg import Experiment
os.system("mkdir -p testfiles")
e = Experiment()
for commit in range(5):
    file_list = []
    for i in range(25):
        with open(f"testfiles/filet{i}.txt", 'w+') as file:
            file.write('hello')
        with open(f"testfiles/filet{i}.txt_tags.yml", 'w+') as yml:
            yml.write(f"---\nid: \"{i}\"\nsource: \"yann lecun\"")
        file_list.append(f"testfiles/filet{i}.txt")
        file_list.append(f"testfiles/filet{i}.txt_tags.yml")
    e.log_artifacts(file_list)
    time.sleep(5)
    print(f"commited: {commit}")
Exemplo n.º 17
0
def lgbm_reg(experiment, artifacts_path, metrics):
    e = Experiment()
    [e.log_param(param, val) for param, val in experiment.get('hyperparams').items()]

    # init
    data = load_data(data_path)
    cv_data = create_cv_data(data['X_train'], data['y_train'], cv_config=cv_config)

    # hyperparams
    model = create_model(experiment['hyperparams'])

    # scores dict
    scores = {'raw_cv_scores': {}, 'cv_scores': {}, 'test_scores': {}}

    # cv
    for task in cv_data:
        X_train, y_train, X_test, y_test = task['X_train'], task['y_train'], task['X_test'], task['y_test']
        X_train, y_train, X_test, y_test = X_train[features_names], y_train[target_name], \
                                           X_test[features_names], y_test[target_name]

        model.fit(X_train, y_train)

        predictions_test = model.predict(X_test)
        predictions_train = model.predict(X_train)

        test_data_to_evaluate = (predictions_test, y_test)
        train_data_to_evaluate = (predictions_train, y_train)

        scores_train = evaluate(*train_data_to_evaluate, metrics=metrics, data_set_name='train_')
        scores_test = evaluate(*test_data_to_evaluate, metrics=metrics)

        task_scores = {**scores_test, **scores_train}

        for score in task_scores.keys():
            if scores['raw_cv_scores'].get(score) is None:
                scores['raw_cv_scores'][score] = []

            scores['raw_cv_scores'][score].append(task_scores[score])

    # process cv scores
    summarized_cv_scores = summarize_scores(scores['raw_cv_scores'])
    scores['cv_scores'].update(summarized_cv_scores)
    scores.update(summarized_cv_scores)

    # final model
    X_train, y_train, X_test, y_test = data['X_train'], data['y_train'], \
                                       data['X_test'], data['y_test']

    final_model = model.fit(X_train[features_names], y_train[target_name])

    predictions_test = model.predict(X_test[features_names])
    predictions_train = model.predict(X_train[features_names])

    test_data_to_evaluate = (predictions_test, y_test[target_name])
    train_data_to_evaluate = (predictions_train, y_train[target_name])

    scores_train = evaluate(*train_data_to_evaluate, metrics=metrics, data_set_name='train_')
    scores_test = evaluate(*test_data_to_evaluate, metrics=metrics)

    scores['test_scores'] = {**scores_test, **scores_train}

    experiment['scores'] = scores

    save_model(final_model, artifacts_path)
    save_model_scores(experiment, artifacts_path)
Exemplo n.º 18
0
class SKTrainerRegression:
	DIGITS_TO_ROUND = 3

	REGRESSION_TYPE = ['linear', 'logistic']

	def __init__(self, model, train_set, test_set, output_model_name, testing_mode, folds=None, regression_type=0):
		self.__model = model
		self.__x_train, self.__y_train = train_set
		self.__train_set_size = len(self.__y_train)
		self.__x_test, self.__y_test = test_set
		self.__test_set_size = len(self.__y_test)
		self.__testing_mode = testing_mode
		self.__cross_val_folds = folds
		self.__is_cross_val = (folds is not None)
		self.__features = list(self.__x_train.columns)
		self.__labels = [str(l) for l in list(set(self.__y_train).union(set(self.__y_test)))]
		self.__metrics = {'model': output_model_name}
		self.__y_pred = None
		self.__experiment = Experiment()
		self.__regression_type = SKTrainerRegression.REGRESSION_TYPE[regression_type]

		self.__coef, self.__intercept = None, None

	def run(self):
		self.__model.fit(self.__x_train, self.__y_train)

		try: self.__coef = self.__model.coef_
		except AttributeError: pass

		try: self.__intercept = self.__model.intercept_
		except AttributeError: pass

		if self.__is_cross_val:
			self.__metrics['folds'] = self.__cross_val_folds

		if self.__is_cross_val is True:
			self.__train_with_cross_validation()
		else:
			self.__train_without_cross_validation()
		self.__save_model()

	def __plot_all(self, y_test_pred):
		self.__plot_accuracies_and_errors()
		# self.__plot_regression_function()
		self.__plot_feature_importance()
		self.__plot_correlation_matrix()
		# self.__plot_feature_vs_feature()

	def __train_with_cross_validation(self):
		"""
		This method enables sk-learn algorithms to perform KFold-cross-validation.
		The method also initiates the cnvrg experiment with all its metrics.
		"""
		scores = cross_validate(estimator=self.__model,
								X=self.__x_train,
								y=self.__y_train,
								cv=self.__cross_val_folds,
								return_train_score=True,
								scoring=['neg_mean_squared_error', 'neg_mean_absolute_error', 'r2', 'accuracy'],
								return_estimator=True)

		train_err_cv_mse = (-1) * scores['train_neg_mean_squared_error']
		train_err_cv_mae = (-1) * scores['train_neg_mean_absolute_error']
		train_err_cv_r2 = scores['train_r2']

		val_acc_cv = scores['test_accuracy']
		val_err_cv_mse = (-1) * scores['test_neg_mean_squared_error']
		val_err_cv_mae = (-1) * scores['test_neg_mean_absolute_error']
		val_err_cv_r2 = scores['test_r2']

		self.__model = scores['estimator'][-1]
		self.__y_pred = self.__model.predict(self.__x_test)
		test_acc = accuracy_score(self.__y_test, self.__y_pred)
		test_loss = mean_squared_error(self.__y_test, self.__y_pred)
		self.__metrics.update({
			'train_loss_mae': train_err_cv_mae,
			'train_loss_mse': train_err_cv_mse,
			'train_loss_r2': train_err_cv_r2,
			'validation_acc': val_acc_cv,
			'val_loss_mae': val_err_cv_mae,
			'val_loss_mse': val_err_cv_mse,
			'val_loss_r2': val_err_cv_r2,
			'test_acc': test_acc,
			'test_loss_mse': test_loss})
		self.__plot_all(self.__y_pred)

	def __train_without_cross_validation(self):
		"""
		The method also initiates the cnvrg experiment with all its metrics.
		"""
		y_hat = self.__model.predict(self.__x_train)  # y_hat is a.k.a y_pred

		train_loss_MSE = mean_squared_error(self.__y_train, y_hat)
		train_loss_MAE = mean_absolute_error(self.__y_train, y_hat)
		train_loss_R2 = r2_score(self.__y_train, y_hat)
		self.__y_pred = self.__model.predict(self.__x_test)
		test_loss_MSE = mean_squared_error(self.__y_test, self.__y_pred)
		test_loss_MAE = mean_absolute_error(self.__y_test, self.__y_pred)
		test_loss_R2 = r2_score(self.__y_test, self.__y_pred)
		self.__metrics.update({
			'train_loss_mae': train_loss_MAE,
			'train_loss_mse': train_loss_MSE,
			'train_loss_r2': train_loss_R2,
			'test_loss_mse': test_loss_MSE,
			'test_loss_mae': test_loss_MAE,
			'test_loss_r2': test_loss_R2})
		self.__plot_all(self.__y_pred)

	def __plot_regression_function(self):
		if self.__regression_type == 'linear':
			a, b = self.__coef[0], self.__intercept
			x = np.linspace(-100, 100, 200)
			y = a * x + b
		elif self.__regression_type == 'logistic':
			x = np.linspace(-100, 100, 200)
			y = 1 / (1 + np.exp(-x))
		self.__experiment.log_metric(key="Regression Function", Xs=x.tolist(), Ys=y.tolist(), grouping=['regression line'] * len(x))

	def __plot_feature_importance(self):
		try:
			importance = getattr(self.__model, "feature_importances_")
			if self.__testing_mode is False:
				self.__experiment.log_chart('Feature Importance', x_axis='Features', y_axis='Importance', data=Bar(x=self.__features, y=importance))
			else:
				print(importance)
		except AttributeError:
			pass

	def __plot_accuracies_and_errors(self):
		if self.__testing_mode is True:
			print("Model: {model}\n"
				  "train_acc={train_acc}\n"
				  "train_loss={train_loss}\n"
				  "test_acc={test_acc}\n"
				  "test_loss={test_loss}".format(
				model=self.__metrics['model'], train_acc=self.__metrics['train_acc'], train_loss=self.__metrics['train_loss'],
				test_acc=self.__metrics['test_acc'], test_loss=self.__metrics['test_loss']))
			if self.__is_cross_val is True:
				print("Folds: {folds}\n".format(folds=self.__metrics['folds']))
		else: # testing mode is off.
			for k, v in self.__metrics.items():
				self.__plot_accuracies_and_errors_helper()
				if isinstance(v, list):
					self.__experiment.log_metric(k, v)
				else:
					self.__experiment.log_param(k, v)

	def __plot_accuracies_and_errors_helper(self):
		for k, v in self.__metrics.items():
			if isinstance(v, float):
				self.__metrics[k] = round(self.__metrics[k], SKTrainerRegression.DIGITS_TO_ROUND)

	def __save_model(self):
		output_model_name = self.__metrics['model']
		output_file_name = os.environ.get("CNVRG_WORKDIR") + "/" + output_model_name if os.environ.get("CNVRG_WORKDIR") \
																				is not None else output_model_name
		pickle.dump(self.__model, open(output_file_name, 'wb'))

	"""training & testing methods"""

	def __plot_correlation_matrix(self):
		data = pd.concat([pd.concat([self.__x_train, self.__x_test], axis=0), pd.concat([self.__y_train, self.__y_test], axis=0)], axis=1)
		correlation = data.corr()
		self.__experiment.log_chart("correlation", [MatrixHeatmap(np.round(correlation.values, 2))],
									x_ticks=correlation.index.tolist(), y_ticks=correlation.index.tolist())

	def __plot_feature_vs_feature(self):
		data = pd.concat([pd.concat([self.__x_train, self.__x_test], axis=0), pd.concat([self.__y_train, self.__y_test], axis=0)], axis=1)
		indexes = data.select_dtypes(include=["number"]).columns
		corr = data.corr()
		for idx, i in enumerate(indexes):
			for jdx, j in enumerate(indexes):
				if i == j: continue
				if jdx < idx: continue
				corr_val = abs(corr[i][j])
				if 1 == corr_val or corr_val < 0.5: continue
				print("create", i, "against", j, "scatter chart")
				droplines = data[[i, j]].notnull().all(1)
				x, y = data[droplines][[i, j]].values.transpose()
				self.__experiment.log_chart("{i}_against_{j}".format(i=i, j=j),
											[Scatterplot(x=x.tolist(), y=y.tolist())],
											title="{i} against {j}".format(i=i, j=j))
Exemplo n.º 19
0
{\rtf1\ansi\ansicpg1252\cocoartf2577
\cocoatextscaling0\cocoaplatform0{\fonttbl\f0\fnil\fcharset0 Monaco;}
{\colortbl;\red255\green255\blue255;\red199\green200\blue201;\red22\green21\blue22;}
{\*\expandedcolortbl;;\cssrgb\c81961\c82353\c82745;\cssrgb\c11373\c10980\c11373\c3922;}
\paperw11900\paperh16840\margl1440\margr1440\vieww11520\viewh8400\viewkind0
\deftab720
\pard\pardeftab720\sl360\partightenfactor0

\f0\fs24 \cf2 \cb3 \expnd0\expndtw0\kerning0
\outl0\strokewidth0 \strokec2 import os\cb1 \
\cb3 import time\cb1 \
\cb3 from cnvrg import Experiment\cb1 \
\cb3 os.system("mkdir -p testfiles")\cb1 \
\cb3 e = Experiment()\cb1 \
\cb3 for commit in range(5):\cb1 \
\cb3     file_list = []\cb1 \
\cb3     for i in range(25):\cb1 \
\cb3         with open(f"testfiles/filet\{i\}.txt", 'w+') as file:\cb1 \
\cb3             file.write('hello')\cb1 \
\cb3         with open(f"testfiles/filet\{i\}.txt_tags.yml", 'w+') as yml:\cb1 \
\cb3             yml.write(f"---\\nid: \\"\{i\}\\"\\nsource: \\"yann lecun\\"")\
\pard\pardeftab720\sl360\partightenfactor0
\cf2 \cb1 \
\pard\pardeftab720\sl360\partightenfactor0
\cf2 \cb3         file_list.append(f"testfiles/filet\{i\}.txt")\cb1 \
\cb3         file_list.append(f"testfiles/filet\{i\}.txt_tags.yml")\
\pard\pardeftab720\sl360\partightenfactor0
\cf2 \cb1 \
\pard\pardeftab720\sl360\partightenfactor0
\cf2 \cb3     e.log_artifacts(file_list)\cb1 \
\cb3     time.sleep(5)\cb1 \
Exemplo n.º 20
0
from cnvrg import Experiment
import time
import os

e = Experiment()
e.log_param("test_acc", 0.6)
f = open("filename_06", "a")
f.write("hello")
f.close()
e.sync(message="my commit: 06")
Exemplo n.º 21
0
def train_without_cross_validation(model, train_set, test_set, project_dir,
                                   output_model_name):
    X_train, y_train = train_set
    # --- Training.
    model.fit(X_train, y_train)
    y_hat = model.predict(X_train)  # y_hat is a.k.a y_pred

    train_acc = accuracy_score(y_train, y_hat)
    train_loss = mean_squared_error(y_train, y_hat)
    # --- Testing.
    X_test, y_test = test_set
    y_pred = model.predict(X_test)
    test_acc = accuracy_score(y_test, y_pred)
    test_loss = mean_squared_error(y_test, y_pred)

    exp = Experiment()
    exp.log_param("model", output_model_name)
    exp.log_param("train_acc", train_acc)
    exp.log_param("train_loss", train_loss)
    exp.log_param("test_acc", test_acc)
    exp.log_param("test_loss", test_loss)

    # Save model.
    output_file_name = project_dir + "/" + output_model_name if project_dir is not None else output_model_name
    pickle.dump(model, open(output_file_name, 'wb'))
Exemplo n.º 22
0
class CSVProcessor:
    def __init__(self,
                 path_to_csv,
                 target_column=None,
                 missing_dict=None,
                 scale_dict=None,
                 normalize_list=None,
                 one_hot_list=None,
                 output_name=None,
                 plot_vis=False):
        """
		:param path_to_csv: string
		:param target_column: string
		:param missing_dict: dict
		:param scale_dict: dict
		:param normalize_list: list
		:param one_hot_list: list
		:param output_name: string
		"""
        self.__cnvrg_env = True  ### When testing locally, it is turned False.
        self.__data = pd.read_csv(path_to_csv, index_col=0)
        self.__target_column = (
            target_column,
            self.__data[target_column]) if target_column is not None else (
                self.__data.columns[-1], self.__data[self.__data.columns[-1]])
        self.__features = [
            f for f in list(self.__data.columns)
            if f != self.__target_column[0]
        ]
        self.__data = self.__data[
            self.__features]  #  removes the target column.
        try:
            self.__experiment = Experiment()
        except cnvrg.modules.errors.UserError:
            self.__cnvrg_env = False

        self.__normalize_list = CSVProcessor.__parse_list(
            normalize_list) if isinstance(normalize_list,
                                          str) else normalize_list
        self.__one_hot_list = CSVProcessor.__parse_list(
            one_hot_list) if isinstance(one_hot_list, str) else one_hot_list
        self.__output_name = output_name if output_name is not None else path_to_csv.split(
            '.csv')[0] + '_processed.csv'
        self.__plot_vis = plot_vis

        ### changed to list of lists instead of dictionary:
        self.__scale_dict = CSVProcessor.__parse_2d_list(
            scale_dict) if isinstance(scale_dict, str) else scale_dict
        self.__missing_dict = CSVProcessor.__parse_2d_list(
            missing_dict) if isinstance(missing_dict, str) else missing_dict

    def run(self):
        self.__handle_missing()
        self.__one_hot_encoding_aka_dummy()
        self.__scale()
        self.__normalize()
        self.__set_target_column()
        self.__save()
        if self.__cnvrg_env:
            self.__plot_metrics()  ### using cnvrg.
            self.__plot_visualization(plot_correlation=True)  ### using cnvrg.
        self.__check_nulls_before_output()

    def __scale(self):
        scale = lambda m, r_min, r_max, t_min, t_max: ((
            (m - r_min) / (r_max - r_min)) * (t_max - t_min)) + t_min

        if self.__scale_dict is not None:
            scale_all = False
            if set(self.__scale_dict.keys()) == set('all'): scale_all = True
            columns_to_scale = self.__features if scale_all is True else self.__scale_dict.keys(
            )
            for col in columns_to_scale:
                y, x = (self.__data[col].min(), self.__data[col].max()
                        ) if scale_all else CSVProcessor.__scale_helper(
                            self.__scale_dict[col])
                self.__data[col] = scale(self.__data[col],
                                         self.__data[col].min(),
                                         self.__data[col].max(), y, x)

    def __normalize(self):
        if self.__normalize_list is not None:
            normalize_all = False
            if set(self.__normalize_list) == set('all'): normalize_all = True

            columns_to_scale = self.__features if normalize_all is True else self.__normalize_list
            for col in columns_to_scale:
                min_range, max_range = self.__data[col].min(
                ), self.__data[col].max()
                self.__data[col] -= min_range
                self.__data[col] /= (max_range - min_range)

    def __one_hot_encoding_aka_dummy(self):
        """
		Handles dummys.
		"""
        if self.__one_hot_list is not None:
            self.__data = pd.get_dummies(self.__data,
                                         columns=self.__one_hot_list)

    def __handle_missing(self):
        """
		Options:
		1) fill_X (fill with value x)
		2) drop
		3) avg (fill with avg)
		4) med (short of median)
		5) rand_A_B (fill with random value in range [A,B]
		"""
        if self.__missing_dict is not None:
            handle_all, task_all = False, None
            if set(self.__missing_dict.keys()) == set('all'):
                handle_all, task_all = True, self.__missing_dict['all']
            column_to_handle = self.__features if handle_all is True else self.__missing_dict.keys(
            )

            for col in column_to_handle:
                task = task_all if task_all is not None else self.__missing_dict[
                    col]
                if task.startswith('fill_'):
                    value = float(task[len('fill_'):]
                                  ) if '.' in task[len('fill_'):] else int(
                                      task[len('fill_'):])
                    self.__data[col] = self.__data[col].fillna(value)
                elif task.startswith('drop'):
                    self.__data = self.__data[self.__data[col].notna()]
                elif task.startswith('avg'):
                    self.__data[col] = self.__data[col].fillna(
                        self.__data[col].mean())
                elif task.startswith('med'):
                    self.__data[col] = self.__data[col].fillna(
                        self.__data[col].median())
                elif task.startswith('randint_'):
                    a, b = task[len('randint_'):].split('_')
                    a, b = float(a) if '.' in a else int(a), float(
                        b) if '.' in b else int(b)
                    self.__data[col] = self.__data[col].fillna(
                        np.random.randint(a, b))
                else:
                    raise ValueError(
                        'Missing Values Handling - Undefined task.')

    def __set_target_column(self):
        self.__data[self.__target_column[0]] = self.__target_column[1]

    def __plot_metrics(self):
        self.__experiment.log_param("output_file", self.__output_name)

    def __plot_visualization(self, plot_correlation=True):
        if self.__plot_vis is False: return

        # Tasks:
        if plot_correlation: self.__plot_correlation_matrix()

    def __save(self):
        self.__data.to_csv(self.__output_name)

    def __check_nulls_before_output(self):
        # Check empty and nan values to warn the user.
        time.sleep(8)
        nulls_report = dict(self.__data.isnull().sum())
        features_with_null_values = [
            k for k, v in nulls_report.items() if v != 0
        ]
        # if len(features_with_null_values) != 0:
        # 	warnings.warn("Null values or empty cells in the data set.", UserWarning)
        return

    """ ------------------- """
    """ ----- Helpers ----- """
    """ ------------------- """

    @staticmethod
    def __parse_2d_list(as_string):
        final_dict = {}
        trimmed = as_string.replace(' ', '')
        commans_idxs = [0] + [
            i for i in range(1, len(trimmed)) if trimmed[i] == ','
            and trimmed[i - 1] == ']' and trimmed[i + 1] == '['
        ] + [len(trimmed) - 1]  ### if its 0, we have single array.
        sub_lists = [
            trimmed[commans_idxs[i - 1] + 1:commans_idxs[i]]
            for i in range(1, len(commans_idxs))
        ] if len(commans_idxs) > 2 else [trimmed[1:-1]]

        for sub_list in sub_lists:
            parsed = CSVProcessor.__parse_list(sub_list)
            try:
                final_dict[parsed[0]] = (parsed[1], parsed[2]
                                         )  ### for scaling.
            except IndexError:
                final_dict[parsed[0]] = parsed[
                    1]  ### for filling empty values.

        return final_dict

    @staticmethod
    def __parse_list(list_as_string):
        if list_as_string == '[]': return []

        list_without_parenthesis = list_as_string.strip()[1:-1]
        parsed_list = [
            st.strip() for st in list_without_parenthesis.split(',')
        ]

        # Check if the values are columns numbers.
        try:
            parsed_list = [int(st) for st in parsed_list]
        except ValueError:
            pass

        return parsed_list

    @staticmethod
    def __parse_dict(dict_as_string):
        if dict_as_string == '{}': return {}
        final_key = dict()
        parsed_dict = eval(dict_as_string)
        if not isinstance(parsed_dict, dict):
            raise TypeError('Given a {} instead of dictionary.'.format(
                type(parsed_dict)))
        all_keys = parsed_dict.keys()
        for k in all_keys:
            true_key, true_value = k, parsed_dict[k].split(':')
            true_key = true_key.strip()
            final_key[true_key] = true_value
        return final_key

    @staticmethod
    def __scale_helper(value):
        min_val, max_val = value.split(':') if isinstance(
            value, str) else value[0], value[1]
        min_val = float(min_val) if '.' in min_val else int(min_val)
        max_val = float(max_val) if '.' in max_val else int(max_val)
        return min_val, max_val

    def __plot_correlation_matrix(self, digits_to_round=3):
        correlation = self.__data.corr()
        self.__experiment.log_chart(
            "Correlation",
            [MatrixHeatmap(np.round(correlation.values, digits_to_round))],
            x_ticks=correlation.index.tolist(),
            y_ticks=correlation.index.tolist())
Exemplo n.º 23
0
class SKTrainer:
	DIGITS_TO_ROUND = 3

	def __init__(self, model, train_set, test_set, output_model_name, testing_mode, folds=None):
		self.__model = model
		self.__x_train, self.__y_train = train_set
		self.__x_test, self.__y_test = test_set
		self.__output_model_name = output_model_name
		self.__testing_mode = testing_mode
		self.__cross_val_folds = folds
		self.__is_cross_val = (folds is not None)
		self.__features = list(self.__x_train.columns)
		self.__labels = [str(l) for l in list(set(self.__y_train).union(set(self.__y_test)))]

		self.__model.fit(self.__x_train, self.__y_train)
		self.__importance = self.__model.feature_importances_

		self.__experiment = Experiment()

		self.__metrics = {'model': self.__output_model_name}
		if self.__is_cross_val:
			self.__metrics['folds'] = self.__cross_val_folds

	def run(self):
		""" runs the training & testing methods. """
		if self.__is_cross_val is True:
			self.__train_with_cross_validation()
		else:
			self.__train_without_cross_validation()

	"""training & testing methods"""

	def __train_with_cross_validation(self):
		"""
		This method enables sk-learn algorithms to perform KFold-cross-validation.
		The method also initiates the cnvrg experiment with all its metrics.
		"""
		train_acc, train_loss = [], []
		kf = KFold(n_splits=self.__cross_val_folds)

		for train_index, val_index in kf.split(self.__x_train):
			X_train, X_val = self.__x_train.iloc[train_index, :], self.__x_train.iloc[val_index, :]
			y_train, y_val = self.__y_train.iloc[train_index], self.__y_train.iloc[val_index]
			self.__model = self.__model.fit(X_train, y_train)

			y_hat = self.__model.predict(X_val)  # y_hat is a.k.a y_pred
			acc = accuracy_score(y_val, y_hat)
			loss = mean_squared_error(y_val, y_hat)

			train_acc.append(acc)
			train_loss.append(loss)

		# --- Testing.
		y_pred = self.__model.predict(self.__x_test)
		test_acc = accuracy_score(self.__y_test, y_pred)
		test_loss = mean_squared_error(self.__y_test, y_pred)
		self.__metrics.update({
			'test_acc': test_acc,
			'test_loss': test_loss
		})
		self.__plot_all(y_pred)

	def __train_without_cross_validation(self):
		"""
		The method also initiates the cnvrg experiment with all its metrics.
		"""
		y_hat = self.__model.predict(self.__x_train)  # y_hat is a.k.a y_pred

		train_acc = accuracy_score(self.__y_train, y_hat)
		train_loss = mean_squared_error(self.__y_train, y_hat)

		y_pred = self.__model.predict(self.__x_test)
		test_acc = accuracy_score(self.__y_test, y_pred)
		test_loss = mean_squared_error(self.__y_test, y_pred)
		self.__metrics.update({
			'train_acc': train_acc,
			'train_loss': train_loss,
			'test_acc': test_acc,
			'test_loss': test_loss
		})
		self.__plot_all(y_pred)

	"""Plotting methods"""

	def __plot_feature_importance(self):
		if self.__testing_mode is False:
			self.__experiment.log_chart('Feature Importance', x_axis='Features', y_axis='Importance', data=Bar(x=self.__features, y=self.__importance))
		else:
			print(self.__importance)

	def __plot_classification_report(self, y_test_pred):
		test_report = classification_report(self.__y_test, y_test_pred, output_dict=True)  # dict
		if self.__testing_mode is False:
			testing_report_as_array = self.__helper_plot_classification_report(test_report)
			self.__experiment.log_chart("Test Set - Classification Report", data=Heatmap(z=testing_report_as_array), y_ticks=self.__labels, x_ticks=["precision", "recall", "f1-score", "support"])
		else:
			print(test_report)

	def __helper_plot_classification_report(self, classification_report_dict):
		""" Converts dictionary given by classification_report to list of lists. """
		rows = []
		for k, v in classification_report_dict.items():
			if k in self.__labels:
				rows.append(list(v.values()))
		values = []
		for y in range(len(rows)):
			for x in range(len(rows[y])):
				values.append((x, y, round(rows[y][x], SKTrainer.DIGITS_TO_ROUND)))
		return values

	def __plot_confusion_matrix(self, y_test_pred=None):
		if self.__y_test is not None and y_test_pred is not None:
			confusion_mat_test = confusion_matrix(self.__y_test, y_test_pred)  # array
			confusion_mat_test = self.__helper_plot_confusion_matrix(confusion_mat_test)
			if self.__testing_mode is False:
				self.__experiment.log_chart("Test Set - confusion matrix", data=Heatmap(z=confusion_mat_test))
			else:
				print(confusion_mat_test)

	def __helper_plot_confusion_matrix(self, confusion_matrix):
		output = []
		for y in range(len(confusion_matrix)):
			for x in range(len(confusion_matrix[y])):
				output.append((x, y, round(float(confusion_matrix[x][y]), SKTrainer.DIGITS_TO_ROUND)))
		return output

	def __plot_roc_curve(self, y_test_pred):
		n_classes = len(self.__labels)
		y_test = self.__y_test.tolist()
		y_test_pred = y_test_pred.tolist()
		if n_classes != 2 or self.__testing_mode is True:
			return
		y_test, y_test_pred = list(y_test), list(y_test_pred)
		FPRs, TPRs, _ = roc_curve(y_test, y_test_pred)
		self.__experiment.log_metric(key='ROC curve', Ys=TPRs.tolist(), Xs=FPRs.tolist())

	def __plot_pandas_analyzer(self):
		data = pd.concat([pd.concat([self.__x_train, self.__x_test], axis=0), pd.concat([self.__y_train, self.__y_test], axis=0)], axis=1)
		if self.__testing_mode is False:
			PandasAnalyzer(data, experiment=self.__experiment)

	def __plot_accuracies_and_errors(self):
		self.__plot_accuracies_and_errors_helper()

		if self.__testing_mode is True:
			print("Model: {model}\n"
				  "train_acc={train_acc}\n"
				  "train_loss={train_loss}\n"
				  "test_acc={test_acc}\n"
				  "test_loss={test_loss}".format(
				model=self.__metrics['model'], train_acc=self.__metrics['train_acc'], train_loss=self.__metrics['train_loss'],
				test_acc=self.__metrics['test_acc'], test_loss=self.__metrics['test_loss']))
			if self.__is_cross_val is True:
				print("Folds: {folds}\n".format(folds=self.__metrics['folds']))

		else:  # testing_mode is False
			self.__experiment.log_param("model", self.__metrics['model'])
			self.__experiment.log_param("test_acc", self.__metrics['test_acc'])
			self.__experiment.log_param("test_loss", self.__metrics['test_loss'])
			if self.__is_cross_val is True:
				self.__experiment.log_param("folds", self.__metrics['folds'])
				self.__experiment.log_metric("train_acc", self.__metrics['train_acc'])
				self.__experiment.log_metric("train_loss", self.__metrics['train_loss'])
				return
			self.__experiment.log_param("train_acc", self.__metrics['train_acc'])
			self.__experiment.log_param("train_loss", self.__metrics['train_loss'])

	def __plot_accuracies_and_errors_helper(self):
		"""Rounds all the values in self.__metrics"""
		keys_to_round = ['train_acc', 'train_loss', 'test_acc', 'test_loss']
		for key in keys_to_round:
			self.__metrics[key] = round(self.__metrics[key], SKTrainer.DIGITS_TO_ROUND)

	def __plot_all(self, y_test_pred):
		"""
		Runs all the plotting methods.
		"""
		self.__plot_pandas_analyzer()
		self.__plot_feature_importance()
		self.__plot_classification_report(y_test_pred=y_test_pred)
		self.__plot_confusion_matrix(y_test_pred=y_test_pred)
		self.__plot_roc_curve(y_test_pred=y_test_pred)
		self.__plot_accuracies_and_errors()
		self.__save_model()

	"""technical methods"""

	def __save_model(self):
		output_file_name = os.environ.get("CNVRG_PROJECT_PATH") + "/" + self.__output_model_name if os.environ.get("CNVRG_PROJECT_PATH") \
																									is not None else self.__output_model_name
		pickle.dump(self.__model, open(output_file_name, 'wb'))
		if not self.__testing_mode:
			os.system("ls -la {}".format(os.environ.get("CNVRG_PROJECT_PATH")))
Exemplo n.º 24
0
class NbConverter:
    def __init__(self, input, to, template, inplace, allow_errors):
        self.__cnvrg_env = True  # When testing locally, it is turned False.
        self.input = input
        self.to = to
        self.template = template
        self.inplace = inplace
        self.allow_errors = allow_errors

        try:
            self.__experiment = Experiment()
        except:
            self.__cnvrg_env = False

        if self.__cnvrg_env:
            self.__experiment.log_param("template", template)

    def run(self):
        if self.__cnvrg_env:
            self.__experiment.log("Configuring nbconvert options")
        run_string = ''
        if self.allow_errors is False:
            if self.template is None:
                if self.to != 'notebook':
                    run_string = "jupyter nbconvert --to {} {}".format(
                        self.to, self.input)
                elif self.inplace is True and self.to == 'notebook':
                    run_string = "jupyter nbconvert --inplace --to {} {}".format(
                        self.to, self.input)
                else:
                    run_string = "jupyter nbconvert --to notebook {}".format(
                        self.input)
            else:
                run_string = "jupyter nbconvert --to {} -template {} {}".format(
                    self.to, self.template, self.input)
        else:
            if self.template is None:
                if self.to != 'notebook':
                    run_string = "jupyter nbconvert --allow-errors --to {} {}".format(
                        self.to, self.input)
                elif self.inplace is True and self.to == 'notebook':
                    run_string = "jupyter nbconvert --allow-errors --inplace --to {} {}".format(
                        self.to, self.input)
                else:
                    run_string = "jupyter nbconvert --allow-errors --to notebook {}".format(
                        self.input)
            else:
                run_string = "jupyter nbconvert --allow-errors --to {} -template {} {}".format(
                    self.to, self.template, self.input)
        log_string = "Running command: {}".format(run_string)
        run_list = run_string.split(' ')
        dir = '/cnvrg'
        if self.__cnvrg_env:
            self.__experiment.log(log_string)
        try:
            subprocess.call(run_list, cwd=dir)
        except OSError:
            print(
                'jupyter nbconvert was unsuccessful. Please check your file path and parameters.'
            )
            exit(1)
        if self.__cnvrg_env:
            self.__experiment.log("Conversion finished")
Exemplo n.º 25
0
import time
import os
from cnvrg import Experiment
i = 0
while True:
    filename = "test-{file_idx}.log".format(file_idx=i)
    f = open(filename, "a")
    f.write("hello")
    f.close()
    Experiment.sync(message="my commit: %d" % i)
    time.sleep(60)
    i += 1
Exemplo n.º 26
0
class SKTrainer:
	def __init__(self, model, train_set, test_set, output_model_name, testing_mode, folds=None):
		self.__model = model
		self.__x_train, self.__y_train = train_set
		self.__x_test, self.__y_test = test_set
		self.__all_data_concatenated = pd.concat([pd.concat([self.__x_train, self.__x_test], axis=0),
												  pd.concat([self.__y_train, self.__y_test], axis=0)], axis=1)
		self.__testing_mode = testing_mode
		self.__cross_val_folds = folds
		self.__is_cross_val = (folds is not None)
		self.__features = list(self.__x_train.columns)
		self.__labels = [str(l) for l in list(set(self.__y_train).union(set(self.__y_test)))]
		self.__metrics = {'model': output_model_name, 'train set size': len(self.__y_train), 'test set size': len(self.__y_test)}
		self.__experiment = Experiment()

	def run(self):
		""" runs the training & testing methods. """
		self.__model.fit(self.__x_train.values, self.__y_train.values)

		if self.__is_cross_val: self.__metrics['folds'] = self.__cross_val_folds

		if self.__is_cross_val is True: self.__train_with_cross_validation()
		else: self.__train_without_cross_validation()

		self.__save_model()

	def __plot_all(self, y_test_pred):
		"""
		This method controls the visualization and metrics outputs.
		Hashtag something which you don't want to plot.
		"""
		self.__plot_correlation_matrix()
		# self.__plot_feature_vs_feature()
		self.__plot_feature_importance()
		self.__plot_classification_report(y_test_pred=y_test_pred)
		self.__plot_confusion_matrix(y_test_pred=y_test_pred)
		self.__plot_roc_curve(y_test_pred=y_test_pred)
		self.__plot_accuracies_and_errors()

	"""training & testing methods"""

	def __train_with_cross_validation(self):
		"""
		This method enables sk-learn algorithms to perform KFold-cross-validation.
		The method also initiates the cnvrg experiment with all its metrics.
		"""
		scores = cross_validate(estimator=self.__model,
								X=self.__x_train.values,
								y=self.__y_train.values,
								cv=self.__cross_val_folds,
								return_train_score=True,
								scoring=['neg_mean_squared_error', 'accuracy'],
								return_estimator=True)

		train_acc_cv = scores['train_accuracy']
		train_err_cv = (-1) * scores['train_neg_mean_squared_error']
		val_acc_cv = scores['test_accuracy']
		val_err_cv = (-1) * scores['test_neg_mean_squared_error']
		self.__model = scores['estimator'][-1]

		y_pred = self.__model.predict(self.__x_test.values)
		test_acc = accuracy_score(self.__y_test.values, y_pred)
		test_loss = zero_one_loss(self.__y_test.values, y_pred)
		self.__metrics.update({
			'train_acc': train_acc_cv,
			'train_loss': train_err_cv,
			'train_loss_type': 'MSE',
			'validation_acc': val_acc_cv,
			'validation_loss': val_err_cv,
			'validation_loss_type': 'MSE',
			'test_acc': test_acc,
			'test_loss': test_loss,
			'test_loss_type': 'zero_one_loss'
		})
		self.__plot_all(y_pred)

	def __train_without_cross_validation(self):
		"""
		The method also initiates the cnvrg experiment with all its metrics.
		"""
		y_hat = self.__model.predict(self.__x_train.values)  # y_hat is a.k.a y_pred

		train_acc = accuracy_score(self.__y_train, y_hat)
		train_loss = zero_one_loss(self.__y_train, y_hat)

		y_pred = self.__model.predict(self.__x_test.values)
		test_acc = accuracy_score(self.__y_test, y_pred)
		test_loss = zero_one_loss(self.__y_test, y_pred)
		self.__metrics.update({
			'train_acc': train_acc,
			'train_loss': train_loss,
			'train_loss_type': 'zero_one_loss',
			'test_acc': test_acc,
			'test_loss': test_loss,
			'test_loss_type': 'zero_one_loss'
		})
		self.__plot_all(y_pred)

	def __plot_feature_importance(self):
		try:
			importance = getattr(self.__model, "feature_importances_")
			if self.__testing_mode is False:
				self.__experiment.log_chart('Feature Importance', x_axis='Features', y_axis='Importance', data=Bar(x=self.__features, y=importance))
			else:
				print(importance)
		except AttributeError:
			pass

	def __plot_classification_report(self, y_test_pred):
		test_report = classification_report(self.__y_test, y_test_pred, output_dict=True)  # dict
		if self.__testing_mode is False:
			testing_report_as_array = self.__helper_plot_classification_report(test_report)
			self.__experiment.log_chart("Test Set - Classification Report", data=Heatmap(z=testing_report_as_array), y_ticks=self.__labels, x_ticks=["precision", "recall", "f1-score", "support"])
		else:
			print(test_report)

	def __plot_confusion_matrix(self, y_test_pred=None):
		""" Plots the confusion matrix. """
		if self.__y_test is not None and y_test_pred is not None:
			confusion_mat_test = confusion_matrix(self.__y_test, y_test_pred)  # array
			confusion_mat_test = SKTrainer.__helper_plot_confusion_matrix(confusion_mat_test)
			if self.__testing_mode is False:
				self.__experiment.log_chart("Test Set - confusion matrix", data=Heatmap(z=confusion_mat_test))
			else:
				print(confusion_mat_test)

	def __plot_roc_curve(self, y_test_pred):
		if len(set(self.__y_test)) != 2: return
		fpr, tpr, _ = roc_curve(self.__y_test, y_test_pred)
		if self.__testing_mode is False:
			self.__experiment.log_metric(key='ROC curve', Ys=tpr.tolist(), Xs=fpr.tolist())
		else: print("FPRs: {fpr}\nTPRs: {tpr}".format(fpr=fpr, tpr=tpr))

	def __plot_correlation_matrix(self):
		data = self.__all_data_concatenated
		correlation = data.corr()
		self.__experiment.log_chart("correlation", [MatrixHeatmap(np.round(correlation.values, 2))],
									x_ticks=correlation.index.tolist(), y_ticks=correlation.index.tolist())

	def __plot_feature_vs_feature(self):
		data = self.__all_data_concatenated
		indexes = data.select_dtypes(include=["number"]).columns
		corr = data.corr()
		for idx, i in enumerate(indexes):
			for jdx, j in enumerate(indexes):
				if i == j: continue
				if jdx < idx: continue
				corr_val = abs(corr[i][j])
				if 1 == corr_val or corr_val < 0.5: continue
				droplines = data[[i, j]].notnull().all(1)
				x, y = data[droplines][[i, j]].values.transpose()
				self.__experiment.log_chart("{i}_against_{j}".format(i=i, j=j),
											[Scatterplot(x=x.tolist(), y=y.tolist())],
											title="{i} against {j}".format(i=i, j=j))

	def __plot_accuracies_and_errors(self):
		self.__plot_accuracies_and_errors_helper_rounding()
		if self.__testing_mode is True: self.__plot_accuracies_and_errors_helper_testing_mode()

		for p in ['model', 'test_acc', 'test_loss', 'test_loss_type', 'train set size', 'test set size', 'train_loss_type']:
			self.__experiment.log_param(p, self.__metrics[p])

		if self.__is_cross_val is True:
			self.__experiment.log_param("folds", self.__metrics['folds'])
			self.__experiment.log_param("validation_loss_type", self.__metrics['validation_loss_type'])
			metrics = ['train_acc', 'train_loss', 'validation_acc', 'validation_loss']
			for m in metrics: self.__experiment.log_metric(m, self.__metrics[m], grouping=[m] * len(self.__metrics[m]))
			return

		self.__experiment.log_param("train_acc", self.__metrics['train_acc'])
		self.__experiment.log_param("train_loss", self.__metrics['train_loss'])
		self.__experiment.log_param("train_loss_type", self.__metrics['train_loss_type'])

	def __save_model(self):
		output_model_name = self.__metrics['model']
		output_file_name = os.environ.get("CNVRG_WORKDIR") + "/" + output_model_name if os.environ.get("CNVRG_WORKDIR") is not None else output_model_name
		pickle.dump(self.__model, open(output_file_name, 'wb'))

	""" --- Helpers --- """

	@staticmethod
	def __helper_plot_confusion_matrix(confusion_matrix, digits_to_round=3):
		output = []
		for y in range(len(confusion_matrix)):
			for x in range(len(confusion_matrix[y])):
				output.append((x, y, round(float(confusion_matrix[x][y]), digits_to_round)))
		return output

	def __plot_accuracies_and_errors_helper_rounding(self, digits_to_round=3):
		for key in self.__metrics.keys():
				# Skip strings.
				if isinstance(self.__metrics[key], str):
					continue
				# Lists & Arrays.
				elif isinstance(self.__metrics[key], list) or isinstance(self.__metrics[key], np.ndarray):
					if isinstance(self.__metrics[key], np.ndarray): self.__metrics[key] = self.__metrics[key].tolist()
					for ind in range(len(self.__metrics[key])):
						self.__metrics[key][ind] = round(self.__metrics[key][ind], digits_to_round)
				# int & floats.
				else:
					self.__metrics[key] = round(self.__metrics[key], digits_to_round)

	def __plot_accuracies_and_errors_helper_testing_mode(self, digits_to_round=3):
		print("Model: {model}\n"
			  "train_acc={train_acc}\n"
			  "train_loss={train_loss}\n"
			  "test_acc={test_acc}\n"
			  "test_loss={test_loss}".format(
			model=self.__metrics['model'], train_acc=self.__metrics['train_acc'], train_loss=self.__metrics['train_loss'],
			test_acc=self.__metrics['test_acc'], test_loss=self.__metrics['test_loss']))
		if self.__is_cross_val is True:
			print("Folds: {folds}\n".format(folds=self.__metrics['folds']))

	def __helper_plot_classification_report(self, classification_report_dict, digits_to_round=3):
		""" Converts dictionary given by classification_report to list of lists. """
		rows = []
		for k, v in classification_report_dict.items():
			if k in self.__labels:
				rows.append(list(v.values()))
		values = []
		for y in range(len(rows)):
			for x in range(len(rows[y])):
				values.append((x, y, round(rows[y][x], digits_to_round)))
		return values
Exemplo n.º 27
0
class TensorflowTrainer:
    GRAYSCALE_CHANNELS, RGB_CHANNELS = 1, 3
    VERBOSE = 1
    WORKERS = 3
    fully_connected_layers = [1024, 512, 256]

    def __init__(self, arguments, model_name, base_model):
        self.__cnvrg_env = True
        self.__arguments = cast_input_types(arguments)
        self.__shape = (arguments.image_height, arguments.image_width)
        self.__classes = parse_classes(arguments.data)
        self.__channels = TensorflowTrainer.RGB_CHANNELS if arguments.image_color == 'rgb' \
         else TensorflowTrainer.GRAYSCALE_CHANNELS
        self.__model = ModelGenerator(
            base_model=base_model,
            num_of_classes=len(self.__classes),
            fully_connected_layers=TensorflowTrainer.fully_connected_layers,
            loss_function=arguments.loss,
            dropout=arguments.dropout,
            activation_hidden_layers=arguments.hidden_layer_activation,
            activation_output_layers=arguments.output_layer_activation,
            optimizer=arguments.optimizer).get_model()
        try:
            self.__experiment = Experiment()
        except cnvrg.modules.UserError:
            self.__cnvrg_env = False
        self.__metrics = {
            'tensorflow local version': tf.__version__,
            'GPUs found':
            len(tf.config.experimental.list_physical_devices('GPU')),
            'Model': model_name,
            'Classes list': self.__classes
        }

    def run(self):
        if self.__cnvrg_env:
            self.__plot_all(status='pre-training')  ### using cnvrg.
        self.__train()
        self.__test()
        if self.__cnvrg_env:
            self.__plot_all()  ### using cnvrg.
            self.__export_model()  ### using cnvrg.

    def __plot_all(self, status='post-test'):
        if status == 'pre-training':
            self.__plot_metrics(status='pre-training')
        elif status == 'post-test' and self.__arguments.data_test is not None:
            self.__plot_metrics(status='post-test')
            self.__plot_confusion_matrix(self.__labels, self.__predictions)

    def __train(self):
        train_generator, val_generator = load_generator(
            self.__arguments.data, self.__shape, self.__arguments.test_size,
            self.__arguments.image_color, self.__arguments.batch_size)

        steps_per_epoch_training = self.__arguments.steps_per_epoch
        steps_per_epoch_validation = self.__arguments.steps_per_epoch

        start_time = time.time()
        time_callback = TimeHistory()

        print("---start training---")
        self.__model.fit(train_generator,
                         epochs=self.__arguments.epochs,
                         workers=multiprocessing.cpu_count() - 1,
                         verbose=TensorflowTrainer.VERBOSE,
                         steps_per_epoch=steps_per_epoch_training,
                         validation_data=val_generator,
                         validation_steps=steps_per_epoch_validation,
                         use_multiprocessing=True,
                         callbacks=[time_callback])
        print("---End training---")

        training_time = time.strftime("%H:%M:%S",
                                      time.gmtime(time.time() - start_time))
        self.__metrics['training_time'] = training_time

        if self.__cnvrg_env:
            self.__experiment.log_metric(
                key="Epoch Times",
                Ys=time_callback.times,
                Xs=[i for i in range(1, self.__arguments.epochs + 1)],
                x_axis="Epoch",
                y_axis="Time (Seconds)")

    def __test(self):
        if self.__arguments.data_test is None:
            return
        test_gen = load_generator(self.__arguments.data_test,
                                  self.__shape,
                                  image_color=self.__arguments.image_color,
                                  batch_size=self.__arguments.batch_size,
                                  generate_test_set=True)
        self.__predictions = np.argmax(self.__model.predict(test_gen), axis=1)
        self.__labels = test_gen.classes

        steps_per_epoch_testing = test_gen.n
        test_loss, test_acc = self.__model.evaluate_generator(
            test_gen,
            workers=TensorflowTrainer.WORKERS,
            verbose=TensorflowTrainer.VERBOSE,
            steps=steps_per_epoch_testing)
        test_acc, test_loss = round(float(test_acc),
                                    3), round(float(test_loss), 3)
        self.__metrics['test_acc'] = test_acc
        self.__metrics['test_loss'] = test_loss

    def __export_model(self):
        output_file_name = os.environ.get("CNVRG_WORKDIR") + "/" + self.__arguments.output_model if os.environ.get("CNVRG_WORKDIR") is not None \
         else self.__arguments.output_model
        self.__model.save(output_file_name)
        export_labels_dictionary_from_classes_list(self.__classes)

    """ Cnvrg metrics output """

    def __plot_metrics(self, status='pre-training'):
        """
		:param training_status: (String) either 'pre' or 'post'.
		"""
        if status == 'pre-training':
            print('Plotting pre-training metrics:')
            for k, v in self.__metrics.items():
                if k not in ['test_acc', 'test_loss']:
                    self.__experiment.log_param(k, v)
        elif status == 'post-test':
            print('Plotting post-test metrics:')
            for k, v in self.__metrics.items():
                if k in ['test_acc', 'test_loss']:
                    self.__experiment.log_param(k, v)
        else:
            raise ValueError('Unrecognized status.')

    def __plot_confusion_matrix(self, labels, predictions):
        """ Plots the confusion matrix. """
        confusion_mat_test = confusion_matrix(labels, predictions)  # array
        confusion_mat_test = TensorflowTrainer.__helper_plot_confusion_matrix(
            confusion_mat_test,
            mat_x_ticks=self.__classes,
            mat_y_ticks=self.__classes)
        self.__experiment.log_chart("confusion matrix",
                                    data=Heatmap(z=confusion_mat_test))

    @staticmethod
    def __helper_plot_confusion_matrix(confusion_matrix,
                                       mat_x_ticks=None,
                                       mat_y_ticks=None,
                                       digits_to_round=3):
        """
		:param confusion_matrix: the values in the matrix.
		:param mat_x_ticks, mat_y_ticks: ticks for the axis of the matrix.
		"""
        output = []
        for y in range(len(confusion_matrix)):
            for x in range(len(confusion_matrix[y])):
                x_val = x if mat_x_ticks is None else mat_x_ticks[x]
                y_val = y if mat_y_ticks is None else mat_y_ticks[y]
                output.append((x_val, y_val,
                               round(float(confusion_matrix[x][y]),
                                     digits_to_round)))
        return output
Exemplo n.º 28
0
            batch_size=int(round(args.test_batch_size)), shuffle=True, **kwargs)

    model = Net().to(device)

    # Load checkpoint
    if args.ckpf != '':
        if use_cuda:
            model.load_state_dict(torch.load(args.ckpf))
        else:
            # Load GPU model on CPU
            model.load_state_dict(torch.load(args.ckpf, map_location=lambda storage, loc: storage))

    optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum)

    try:
        e = Experiment()
    except:
        e = Experiment.init()

    def train(args, model, device, train_loader, optimizer, epoch):
        """Training"""

        model.train()

        tot_loss = 0

        for batch_idx, (data, target) in enumerate(train_loader):
            data, target = data.to(device), target.to(device)
            optimizer.zero_grad()
            output = model(data)
            loss = F.nll_loss(output, target)
Exemplo n.º 29
0
class TensorflowTrainer:
    GRAYSCALE_CHANNELS = 1
    RGB_CHANNELS = 3
    VERBOSE = 1
    WORKERS = 3
    fully_connected_layers = [1024, 512, 256]

    METRICS = {
        'pre-training': [
            'TensorFlow version',
            'GPUs found',
            'Model',
            # 'Classes list'
        ],
        'post-training': [
            'training_time',
            # 'epochs_duration',
            # 'avg_time_per_epoch',
            # 'time_per_step'
        ],
        'post-test': ['test_acc', 'test_loss']
    }

    def __init__(self, arguments, model_name, base_model):
        self.__cnvrg_env = True
        self.__arguments = arguments
        self.__shape = (arguments.image_height, arguments.image_width)
        self.__classes = parse_classes(arguments.data)
        self.__channels = TensorflowTrainer.RGB_CHANNELS if arguments.image_color == 'rgb' \
         else TensorflowTrainer.GRAYSCALE_CHANNELS
        self.__model = ModelGenerator(
            base_model=base_model,
            num_of_classes=len(self.__classes),
            fully_connected_layers=TensorflowTrainer.fully_connected_layers,
            loss_function=arguments.loss,
            dropout=arguments.dropout,
            activation_hidden_layers=arguments.hidden_layer_activation,
            activation_output_layers=arguments.output_layer_activation,
            optimizer=arguments.optimizer).get_model()
        try:
            print("Trying to launch an experiment in cnvrg environment.")
            self.__experiment = Experiment()
        except Exception:
            print("Not in cnvrg environment.")
            self.__cnvrg_env = False

        self.__metrics = {
            'TensorFlow version': tf.__version__,
            'GPUs found':
            len(tf.config.experimental.list_physical_devices('GPU')),
            'Model': model_name,
            'Classes list': self.__classes
        }

    def run(self):
        self.__plot(status='pre-training')

        self.__train()
        self.__plot(status='post-training')

        self.__test()
        self.__plot(status='post-test')

        self.__export_model()

    def __plot(self, status):
        if status == 'pre-training':
            self.__plot_metrics(status='pre-training')

        elif status == 'post-training':
            self.__plot_metrics(status='post-training')

        elif status == 'post-test' and self.__arguments.data_test is not None:
            self.__plot_metrics(status='post-test')
            self.__plot_confusion_matrix(self.__labels, self.__predictions)

    def __train(self):
        train_generator, val_generator = load_generator(
            self.__arguments.data,
            self.__shape,
            self.__arguments.test_size,  # test_size = validation_split
            self.__arguments.image_color,
            self.__arguments.batch_size)

        start_time = time.time()
        time_callback = TimeHistory()

        print("--- Starts Training ---")

        from PIL import ImageFile
        ImageFile.LOAD_TRUNCATED_IMAGES = True

        self.__model.fit(train_generator,
                         epochs=self.__arguments.epochs,
                         verbose=self.__arguments.verbose,
                         steps_per_epoch=self.__arguments.steps_per_epoch,
                         validation_data=val_generator
                         if self.__arguments.test_size != 0. else None,
                         validation_steps=self.__arguments.steps_per_epoch
                         if self.__arguments.test_size != 0. else None,
                         callbacks=[time_callback])

        print("--- Ends training ---")

        training_time = time.strftime("%H:%M:%S",
                                      time.gmtime(time.time() - start_time))
        self.__metrics['training_time'] = training_time
        self.__metrics['epochs_duration'] = Metric(key='Epochs Duration',
                                                   Ys=time_callback.times,
                                                   Xs='from_1',
                                                   x_axis='epochs',
                                                   y_axis='time (seconds)')
        self.__metrics['avg_time_per_epoch'] = round(
            sum(time_callback.times) / len(time_callback.times), 3)

        if self.__arguments.steps_per_epoch is not None:
            self.__metrics['time_per_step'] = Metric(
                key='Time per Step',
                Ys=[
                    round(
                        time_callback.times[i] /
                        self.__arguments.steps_per_epoch, 3)
                    for i in range(self.__arguments.epochs)
                ],
                Xs='from_1',
                x_axis='epochs',
                y_axis='time (ms)/step')

    def __test(self):
        if self.__arguments.data_test is None:
            return
        test_gen = load_generator(self.__arguments.data_test,
                                  self.__shape,
                                  image_color=self.__arguments.image_color,
                                  batch_size=self.__arguments.batch_size,
                                  generate_test_set=True)
        self.__predictions = np.argmax(self.__model.predict(test_gen), axis=1)
        self.__labels = test_gen.classes

        steps_per_epoch_testing = test_gen.n
        test_loss, test_acc = self.__model.evaluate_generator(
            test_gen,
            workers=TensorflowTrainer.WORKERS,
            verbose=TensorflowTrainer.VERBOSE,
            steps=steps_per_epoch_testing)

        test_acc, test_loss = round(float(test_acc),
                                    3), round(float(test_loss), 3)
        self.__metrics['test_acc'] = test_acc
        self.__metrics['test_loss'] = test_loss

    def __export_model(self):
        output_file_name = os.environ.get("CNVRG_WORKDIR") + "/" + self.__arguments.output_model if os.environ.get("CNVRG_WORKDIR") is not None \
         else self.__arguments.output_model
        self.__model.save(output_file_name)
        export_labels_dictionary_from_classes_list(self.__classes)

    # ============ Helpers ============

    def __plot_metrics(self, status):
        metrics = TensorflowTrainer.METRICS[status]

        if status == 'pre-training':
            for metric in metrics:
                if self.__cnvrg_env:
                    if metric in self.__metrics.keys():  # if metric exists
                        self.__experiment.log_param(metric,
                                                    self.__metrics[metric])
                else:
                    print("log_param -  {key} : {value}".format(
                        key=metric, value=self.__metrics[metric]))

        elif status == 'post-training':
            for metric in metrics:
                if metric in self.__metrics.keys():  # if metric exists
                    if not isinstance(self.__metrics[metric], Metric):  # param
                        if self.__cnvrg_env:
                            self.__experiment.log_param(
                                metric, self.__metrics[metric])
                        else:
                            print("log_param -  {key} : {value}".format(
                                key=metric, value=self.__metrics[metric]))
                    else:  # metrics should be called here.
                        if self.__cnvrg_env:
                            self.__experiment.log_metric(
                                key=self.__metrics[metric].key,
                                Ys=self.__metrics[metric].Ys,
                                Xs=self.__metrics[metric].Xs,
                                x_axis=self.__metrics[metric].x_axis,
                                y_axis=self.__metrics[metric].y_axis)
                        else:
                            print(self.__metrics[metric])

        elif status == 'post-test':
            for metric in metrics:
                if metric in self.__metrics.keys():  # if metric exists

                    if self.__cnvrg_env:
                        self.__experiment.log_param(metric,
                                                    self.__metrics[metric])
                    else:
                        print("log_param -  {key} : {value}".format(
                            key=metric, value=self.__metrics[metric]))

        else:
            raise ValueError('Unrecognized status.')

    def __plot_confusion_matrix(self, labels, predictions):
        """ Plots the confusion matrix. """
        confusion_mat_test = confusion_matrix(labels, predictions)  # array
        confusion_mat_test = TensorflowTrainer.__helper_plot_confusion_matrix(
            confusion_mat_test,
            mat_x_ticks=self.__classes,
            mat_y_ticks=self.__classes)
        self.__experiment.log_chart("confusion matrix",
                                    data=Heatmap(z=confusion_mat_test))

    @staticmethod
    def __helper_plot_confusion_matrix(confusion_matrix,
                                       mat_x_ticks=None,
                                       mat_y_ticks=None,
                                       digits_to_round=3):
        """
		:param confusion_matrix: the values in the matrix.
		:param mat_x_ticks, mat_y_ticks: ticks for the axis of the matrix.
		"""
        output = []
        for y in range(len(confusion_matrix)):
            for x in range(len(confusion_matrix[y])):
                x_val = x if mat_x_ticks is None else mat_x_ticks[x]
                y_val = y if mat_y_ticks is None else mat_y_ticks[y]
                output.append((x_val, y_val,
                               round(float(confusion_matrix[x][y]),
                                     digits_to_round)))
        return output
Exemplo n.º 30
0
def train_with_cross_validation(model, train_set, test_set, folds, project_dir,
                                output_model_name):
    train_acc, train_loss = [], []
    kf = KFold(n_splits=folds)
    X, y = train_set
    # --- Training.
    for train_index, val_index in kf.split(X):
        X_train, X_val = X.iloc[train_index, :], X.iloc[val_index, :]
        y_train, y_val = y.iloc[train_index], y.iloc[val_index]
        model.fit(X_train, y_train)
        model.n_estimators += 1
        y_hat = model.predict(X_val)  # y_hat is a.k.a y_pred
        acc = accuracy_score(y_val, y_hat)
        loss = mean_squared_error(y_val, y_hat)

        train_acc.append(acc)
        train_loss.append(loss)
    # --- Testing.
    X_test, y_test = test_set
    y_pred = model.predict(X_test)
    test_acc = accuracy_score(y_test, y_pred)
    test_loss = mean_squared_error(y_test, y_pred)

    exp = Experiment()
    exp.log_param("model", output_model_name)
    exp.log_param("folds", folds)
    exp.log_metric("train_acc", train_acc)
    exp.log_metric("train_loss", train_loss)
    exp.log_param("test_acc", test_acc)
    exp.log_param("test_loss", test_loss)

    # Save model.
    output_file_name = project_dir + "/" + output_model_name if project_dir is not None else output_model_name
    pickle.dump(model, open(output_file_name, 'wb'))