def log_model(self, model_name=None): """ Log the feature extractor to an MLflow server. Assumes you've already run track_with_mflow() """ if model_name is None: model_name = self.modelname + "_FCN" assert hasattr(self, "_mlflow"), "need to run track_with_mlflow() first" from mlflow.keras import log_model log_model(self._models["fcn"], model_name)
def log_model(self, model_name="fixmatch_model"): """ Log the feature extractor to an MLflow server. Assumes you've already run track_with_mflow() Overwriting the generic version of this function because we're not building an FCN here. """ assert hasattr(self, "_mlflow"), "need to run track_with_mlflow() first" from mlflow.keras import log_model log_model(self._models["full"], model_name)
def train(model, compile_kwargs, fit_kwargs, optional_params={}): ''' This is a wrapper function for tracking expirements with MLflow Parameters ---------- model: Keras model The model to track compile_kwargs: dict Keyword arguments to compile model with fit_kwargs: dict Keyword arguments to fit model with ''' with mlflow.start_run() as run: mlflow.log_param("mlflow.version", mlflow.version.VERSION) run_id = run.info.run_id experiment_id = run.info.experiment_id print("MLflow:") print(" run_id:", run_id) print(" experiment_id:", experiment_id) model = model() model.compile(**compile_kwargs) history = model.fit(**fit_kwargs) for param_key, param_value in { **compile_kwargs, **fit_kwargs, **optional_params }.items(): if param_key not in ["x", "y", "X_val", "y_val"]: mlflow.log_param(param_key, param_value) for key, values in history.history.items(): for v in values: if not np.isnan(v): # MLflow won't log NaN mlflow.log_metric(key, v) for i, layer in enumerate(model.layers): mlflow.log_param("hidden_layer_" + str(i) + "_units", layer.output_shape) log_model(model, "keras-model") return run_id
enc = OneHotEncoder() Y = enc.fit_transform(y[:, np.newaxis]).toarray() # Scale data to have mean 0 and variance 1 # which is importance for convergence of the neural network scaler = StandardScaler() X_scaled = scaler.fit_transform(X) x_train, x_test, y_train, y_test = train_test_split(X_scaled, Y, test_size=0.2, random_state=42) with mlflow.start_run(): model = tf.keras.Sequential([ tf.keras.layers.Dense(10, activation=tf.nn.relu, input_shape=(4, )), tf.keras.layers.Dense(10, activation=tf.nn.relu), tf.keras.layers.Dense(3, activation="softmax") ]) model.compile(optimizer='adam', loss=tf.keras.losses.CategoricalCrossentropy(), metrics=['accuracy']) model.fit(x_train, y_train, epochs=1000) test_loss, test_acc = model.evaluate(x_test, y_test) log_metric('acc', test_acc) log_model(keras_model=model, registered_model_name='Keras-Iris-Model', artifact_path='model_artifact', conda_env=conda_env)
def train_test(model, data, y, args): import os import tensorflow.keras.backend as bk import tensorflow as tf from hypermodel_utils import load_partition, get_callbacks, get_results_table, allocate_stats, mlflow_logs MODEL_TYPE = args.model_type TIMESTAMP = args.timestamp samples = args.samples X = [x for x in data.X] FIT_MAX_EPOCHS = args.epochs results = list() model.summary() kf = StratifiedShuffleSplit(n_splits=args.cv, random_state=args.samples[0]) kf.get_n_splits(X[0], y) results.append(get_results_table()) best_weights = [None for _ in range(args.cv)] best_stats = [None for _ in range(args.cv)] metrics_stats = { m: {'p{:02d}'.format(x): [] for x in range(args.cv)} for m in ('Prec', 'Sn', 'Sp', 'Acc', 'F1', 'Mcc') } weights_sufix = '.hdf5' # Cross validation loop for idx, (cv_train_index, cv_test_index) in enumerate(kf.split(X[0], y)): # TODO split nucleotides strings, not matrix (x_train, y_train), (x_test, y_test) = data.load_partition( cv_train_index, cv_test_index) callbacks = get_callbacks(args, idx) best_cv_mcc = -10000.0 # === Perform Training Phase === # Iterate over samples to create different samples on same partition based on different train-val splits for s, seed in enumerate(samples): print('CV PARTITION {} - CV SEED {} | Train on SEED {}'.format( idx, s, seed)) weight_file_prefix = '{}-{}-partition_{:02d}'.format( MODEL_TYPE, TIMESTAMP, idx) weight_file_name = weight_file_prefix + '-sample_{:02d}'.format(s) weight_file_name += '-epoch_{epoch:02d}.hdf5' weight_path = os.path.join(args.weights_dir, weight_file_name) callbacks[0] = tf.keras.callbacks.ModelCheckpoint( weight_path, save_best_only=True, save_weights_only=True, verbose=1) kf = StratifiedShuffleSplit(n_splits=1, random_state=seed, test_size=0.05) kf.get_n_splits(x_train, y_train) histories = [] for t_index, v_index in kf.split(x_train[0], y_train): (xx_train, yy_train), val_data = data.load_partition(t_index, v_index) history = model.fit(x=xx_train, y=yy_train, batch_size=args.batch_size, epochs=FIT_MAX_EPOCHS, validation_data=val_data, callbacks=callbacks, class_weight={ 0: .2, 1: .8 }, verbose=0) # PLOT HISTORY _plot_history(args, 'accuracy', history, idx, s) _plot_history(args, 'loss', history, idx, s) # # Data Augmentation ================================================= # train_datagen = AugmentedGeneratorMultipleInputs(xx_train, yy_train, args.batch_size) # _steps_per_epoch = int(len(yy_train) / args.batch_size) # print("_steps_per_epoch", _steps_per_epoch) # # model.fit_generator(train_datagen.flow(xx_train, yy_train, batch_size=args.batch_size), # model.fit_generator(train_datagen, # validation_data=val_data, # steps_per_epoch=_steps_per_epoch, # epochs=FIT_MAX_EPOCHS, # callbacks=callbacks, # verbose=1) # # ================================================================== # # Test model fitted using seed validation set # stats, y_pred = get_test_stats(model, x_test, y_test) # print(stats.Mcc) # print(stats.F1) # === Perform Test Phase === sample_weights = [ x for x in os.listdir(args.weights_dir + os.path.sep) if x.startswith(weight_file_prefix) and x.endswith(weights_sufix) ] # Iterate over all weights saved on checkpoints for this sample of this partition for i, f in enumerate(sample_weights): best_sample_mcc = -10000.0 best_sample_stats = None bk.clear_session() name = os.path.join(args.weights_dir, f) # print(name) model.load_weights(name) stats, y_pred = get_test_stats(model=model, X=x_test, y=y_test) # Select best sample weights if best_sample_mcc < stats.Mcc: best_sample_mcc = stats.Mcc best_sample_stats = stats # Select best weigths for this partition if best_cv_mcc < stats.Mcc: best_cv_mcc = stats.Mcc selected_weight = f best_stats[idx] = stats # print(stats) for metric in best_sample_stats.get_stats_types(): sample_stats = best_sample_stats.to_dict() metrics_stats[metric]['p{:02}'.format(idx)].append( sample_stats[metric]) # Delete temporary weights for i, f in enumerate(sample_weights): if f != selected_weight: # print('Deleting weight: {}'.format(f)) path = args.weights_dir + '/' + f os.remove(path) bk.clear_session() print(best_sample_stats) # Persist best weights of this partition on logs args.logs.set_metrics(**metrics_stats) # Log metrics args.logs.set_artifacts() # Log artifacts model.load_weights(os.path.join(args.weights_dir, selected_weight)) persist_model_path = os.path.join( args.best_weights_dir, 'model_{}_{}_p{:02d}'.format(MODEL_TYPE, TIMESTAMP, idx)) # mlflow_keras.save_model(model, persist_model_path) # mlflow_keras.log_model(model, args.logs.get_model_path()) mlflow_keras.save_model(model, args.logs.get_model_path(idx)) mlflow_keras.log_model(model, 'models') # print('Deleting weight: {}'.format(selected_weight)) path = args.weights_dir + '/' + selected_weight os.remove(path)
x_train, x_test = x_train / 255.0, x_test / 255.0 model = tf.keras.models.Sequential([ tf.keras.layers.Flatten(input_shape=(28, 28)), tf.keras.layers.Dense(512, activation=tf.nn.relu), tf.keras.layers.Dropout(0.2), tf.keras.layers.Dense(10, activation=tf.nn.softmax) ]) model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy']) model.fit(x_train, y_train, epochs=1) model.evaluate(x_test, y_test) #log model log_model(model, "keras-model") tf_k_model = mlflow.keras.load_model(mlflow.get_artifact_uri('keras-model')) for i in range(3): # get single images in the shape expected by the model img = x_test[i] img = (np.expand_dims(img,0)) print(img.shape) predictions_single = tf_k_model.predict(img) print(predictions_single) prediction_result = np.argmax(predictions_single[0]) print("predicted value={};observed value={}".format(prediction_result, y_test[i])) """ pyfunc_model = pyfunc.load_pyfunc(mlflow.get_artifact_uri('keras-model')) df = pd.DataFrame(data=x_test, columns=["features"] * x_train.shape[1]) print(pyfunc_model.predict(df)) """