示例#1
0
 def log_model(self, model_name=None):
     """
     Log the feature extractor to an MLflow server. Assumes you've
     already run track_with_mflow()
     """
     if model_name is None:
         model_name = self.modelname + "_FCN"
         
     assert hasattr(self, "_mlflow"), "need to run track_with_mlflow() first"
     from mlflow.keras import log_model
     log_model(self._models["fcn"], model_name)
示例#2
0
    def log_model(self, model_name="fixmatch_model"):
        """
        Log the feature extractor to an MLflow server. Assumes you've
        already run track_with_mflow()
        
        Overwriting the generic version of this function because we're not
        building an FCN here.
        """

        assert hasattr(self,
                       "_mlflow"), "need to run track_with_mlflow() first"
        from mlflow.keras import log_model
        log_model(self._models["full"], model_name)
def train(model, compile_kwargs, fit_kwargs, optional_params={}):
    '''
    This is a wrapper function for tracking expirements with MLflow
        
    Parameters
    ----------
    model: Keras model
        The model to track
        
    compile_kwargs: dict
        Keyword arguments to compile model with
    
    fit_kwargs: dict
        Keyword arguments to fit model with
    '''
    with mlflow.start_run() as run:
        mlflow.log_param("mlflow.version", mlflow.version.VERSION)
        run_id = run.info.run_id
        experiment_id = run.info.experiment_id
        print("MLflow:")
        print("    run_id:", run_id)
        print("    experiment_id:", experiment_id)
        model = model()
        model.compile(**compile_kwargs)
        history = model.fit(**fit_kwargs)

        for param_key, param_value in {
                **compile_kwargs,
                **fit_kwargs,
                **optional_params
        }.items():
            if param_key not in ["x", "y", "X_val", "y_val"]:
                mlflow.log_param(param_key, param_value)

        for key, values in history.history.items():
            for v in values:
                if not np.isnan(v):  # MLflow won't log NaN
                    mlflow.log_metric(key, v)

        for i, layer in enumerate(model.layers):
            mlflow.log_param("hidden_layer_" + str(i) + "_units",
                             layer.output_shape)

        log_model(model, "keras-model")
        return run_id
示例#4
0
    enc = OneHotEncoder()
    Y = enc.fit_transform(y[:, np.newaxis]).toarray()

    # Scale data to have mean 0 and variance 1
    # which is importance for convergence of the neural network
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)

    x_train, x_test, y_train, y_test = train_test_split(X_scaled,
                                                        Y,
                                                        test_size=0.2,
                                                        random_state=42)
    with mlflow.start_run():
        model = tf.keras.Sequential([
            tf.keras.layers.Dense(10, activation=tf.nn.relu,
                                  input_shape=(4, )),
            tf.keras.layers.Dense(10, activation=tf.nn.relu),
            tf.keras.layers.Dense(3, activation="softmax")
        ])
        model.compile(optimizer='adam',
                      loss=tf.keras.losses.CategoricalCrossentropy(),
                      metrics=['accuracy'])
        model.fit(x_train, y_train, epochs=1000)
        test_loss, test_acc = model.evaluate(x_test, y_test)

        log_metric('acc', test_acc)
        log_model(keras_model=model,
                  registered_model_name='Keras-Iris-Model',
                  artifact_path='model_artifact',
                  conda_env=conda_env)
示例#5
0
def train_test(model, data, y, args):
    import os
    import tensorflow.keras.backend as bk
    import tensorflow as tf
    from hypermodel_utils import load_partition, get_callbacks, get_results_table, allocate_stats, mlflow_logs

    MODEL_TYPE = args.model_type
    TIMESTAMP = args.timestamp
    samples = args.samples
    X = [x for x in data.X]
    FIT_MAX_EPOCHS = args.epochs

    results = list()
    model.summary()

    kf = StratifiedShuffleSplit(n_splits=args.cv, random_state=args.samples[0])
    kf.get_n_splits(X[0], y)
    results.append(get_results_table())

    best_weights = [None for _ in range(args.cv)]
    best_stats = [None for _ in range(args.cv)]

    metrics_stats = {
        m: {'p{:02d}'.format(x): []
            for x in range(args.cv)}
        for m in ('Prec', 'Sn', 'Sp', 'Acc', 'F1', 'Mcc')
    }
    weights_sufix = '.hdf5'

    # Cross validation loop
    for idx, (cv_train_index, cv_test_index) in enumerate(kf.split(X[0], y)):
        # TODO split nucleotides strings, not matrix
        (x_train, y_train), (x_test, y_test) = data.load_partition(
            cv_train_index, cv_test_index)
        callbacks = get_callbacks(args, idx)
        best_cv_mcc = -10000.0

        # === Perform Training Phase ===
        # Iterate over samples to create different samples on same partition based on different train-val splits
        for s, seed in enumerate(samples):
            print('CV PARTITION {} - CV SEED {} | Train on SEED {}'.format(
                idx, s, seed))
            weight_file_prefix = '{}-{}-partition_{:02d}'.format(
                MODEL_TYPE, TIMESTAMP, idx)
            weight_file_name = weight_file_prefix + '-sample_{:02d}'.format(s)
            weight_file_name += '-epoch_{epoch:02d}.hdf5'
            weight_path = os.path.join(args.weights_dir, weight_file_name)

            callbacks[0] = tf.keras.callbacks.ModelCheckpoint(
                weight_path,
                save_best_only=True,
                save_weights_only=True,
                verbose=1)

            kf = StratifiedShuffleSplit(n_splits=1,
                                        random_state=seed,
                                        test_size=0.05)
            kf.get_n_splits(x_train, y_train)
            histories = []
            for t_index, v_index in kf.split(x_train[0], y_train):
                (xx_train,
                 yy_train), val_data = data.load_partition(t_index, v_index)
                history = model.fit(x=xx_train,
                                    y=yy_train,
                                    batch_size=args.batch_size,
                                    epochs=FIT_MAX_EPOCHS,
                                    validation_data=val_data,
                                    callbacks=callbacks,
                                    class_weight={
                                        0: .2,
                                        1: .8
                                    },
                                    verbose=0)

                # PLOT HISTORY
                _plot_history(args, 'accuracy', history, idx, s)
                _plot_history(args, 'loss', history, idx, s)

                # # Data Augmentation =================================================
                # train_datagen = AugmentedGeneratorMultipleInputs(xx_train, yy_train, args.batch_size)
                # _steps_per_epoch = int(len(yy_train) / args.batch_size)
                # print("_steps_per_epoch", _steps_per_epoch)
                # # model.fit_generator(train_datagen.flow(xx_train, yy_train, batch_size=args.batch_size),
                # model.fit_generator(train_datagen,
                #                     validation_data=val_data,
                #                     steps_per_epoch=_steps_per_epoch,
                #                     epochs=FIT_MAX_EPOCHS,
                #                     callbacks=callbacks,
                #                     verbose=1)
                # # ==================================================================

            # # Test model fitted using seed validation set
            # stats, y_pred = get_test_stats(model, x_test, y_test)
            # print(stats.Mcc)
            # print(stats.F1)

            # === Perform Test Phase ===
            sample_weights = [
                x for x in os.listdir(args.weights_dir + os.path.sep) if
                x.startswith(weight_file_prefix) and x.endswith(weights_sufix)
            ]

            # Iterate over all weights saved on checkpoints for this sample of this partition
            for i, f in enumerate(sample_weights):
                best_sample_mcc = -10000.0
                best_sample_stats = None
                bk.clear_session()
                name = os.path.join(args.weights_dir, f)
                # print(name)
                model.load_weights(name)
                stats, y_pred = get_test_stats(model=model, X=x_test, y=y_test)

                # Select best sample weights
                if best_sample_mcc < stats.Mcc:
                    best_sample_mcc = stats.Mcc
                    best_sample_stats = stats

                # Select best weigths for this partition
                if best_cv_mcc < stats.Mcc:
                    best_cv_mcc = stats.Mcc
                    selected_weight = f
                    best_stats[idx] = stats
                    # print(stats)

            for metric in best_sample_stats.get_stats_types():
                sample_stats = best_sample_stats.to_dict()
                metrics_stats[metric]['p{:02}'.format(idx)].append(
                    sample_stats[metric])

            # Delete temporary weights
            for i, f in enumerate(sample_weights):
                if f != selected_weight:
                    # print('Deleting weight: {}'.format(f))
                    path = args.weights_dir + '/' + f
                    os.remove(path)

        bk.clear_session()

        print(best_sample_stats)
        # Persist best weights of this partition on logs
        args.logs.set_metrics(**metrics_stats)  # Log metrics
        args.logs.set_artifacts()  # Log artifacts
        model.load_weights(os.path.join(args.weights_dir, selected_weight))
        persist_model_path = os.path.join(
            args.best_weights_dir,
            'model_{}_{}_p{:02d}'.format(MODEL_TYPE, TIMESTAMP, idx))
        # mlflow_keras.save_model(model, persist_model_path)
        # mlflow_keras.log_model(model, args.logs.get_model_path())
        mlflow_keras.save_model(model, args.logs.get_model_path(idx))
        mlflow_keras.log_model(model, 'models')
        # print('Deleting weight: {}'.format(selected_weight))
        path = args.weights_dir + '/' + selected_weight
        os.remove(path)
示例#6
0
x_train, x_test = x_train / 255.0, x_test / 255.0

model = tf.keras.models.Sequential([
    tf.keras.layers.Flatten(input_shape=(28, 28)),
    tf.keras.layers.Dense(512, activation=tf.nn.relu),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(10, activation=tf.nn.softmax)
])
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

model.fit(x_train, y_train, epochs=1)
model.evaluate(x_test, y_test)
#log model
log_model(model, "keras-model")
tf_k_model = mlflow.keras.load_model(mlflow.get_artifact_uri('keras-model'))
for i in range(3):
    # get single images in the shape expected by the model
    img = x_test[i]
    img = (np.expand_dims(img,0))
    print(img.shape)
    predictions_single = tf_k_model.predict(img)
    print(predictions_single)
    prediction_result = np.argmax(predictions_single[0])
    print("predicted value={};observed value={}".format(prediction_result, y_test[i]))
"""
pyfunc_model = pyfunc.load_pyfunc(mlflow.get_artifact_uri('keras-model'))
df = pd.DataFrame(data=x_test, columns=["features"] * x_train.shape[1])
print(pyfunc_model.predict(df))
"""