Exemplo n.º 1
0
def main(argv):
    # Builds, trains and evaluates a tf.estimator. Then, exports it for inference, logs the exported model 
    # with MLflow, and loads the fitted model back as a PyFunc to make predictions.
    (x_train, y_train), (x_test, y_test) = tf.keras.datasets.boston_housing.load_data()
    # There are 13 features we are using for inference.
    feat_cols = [tf.feature_column.numeric_column(key="features", shape=(x_train.shape[1],))]
    feat_spec = {"features":tf.placeholder("float", name="features", shape=[None, x_train.shape[1]])}
    hidden_units = [50, 20]
    steps = 1000
    regressor = tf.estimator.DNNRegressor(hidden_units=hidden_units, feature_columns=feat_cols)
    train_input_fn = tf.estimator.inputs.numpy_input_fn({"features": x_train}, y_train, num_epochs=None, shuffle=True)
    with tracking.start_run() as tracked_run:
        mlflow.log_param("Hidden Units", hidden_units)
        mlflow.log_param("Steps", steps)
        regressor.train(train_input_fn, steps=steps)
        test_input_fn = tf.estimator.inputs.numpy_input_fn({"features": x_test}, y_test, num_epochs=None, shuffle=True)
        # Compute mean squared error
        mse = regressor.evaluate(test_input_fn, steps=steps)
        mlflow.log_metric("Mean Square Error", mse['average_loss'])
        # Building a receiver function for exporting
        receiver_fn = tf.estimator.export.build_raw_serving_input_receiver_fn(feat_spec)
        temp = tempfile.mkdtemp()
        try:
            saved_estimator_path = regressor.export_savedmodel(temp, receiver_fn).decode("utf-8")
            # Logging the saved model
            tensorflow.log_saved_model(saved_model_dir=saved_estimator_path, signature_def_key="predict", artifact_path="model")
            # Reloading the model
            pyfunc = tensorflow.load_pyfunc(saved_estimator_path)
            df = pd.DataFrame(data=x_test, columns=["features"] * x_train.shape[1])
            # Predicting on the loaded Python Function
            predict_df = pyfunc.predict(df)
            predict_df['original_labels'] = y_test
            print(predict_df)
        finally:
            shutil.rmtree(temp)
Exemplo n.º 2
0
def test_dnn():
    old_uri = tracking.get_tracking_uri()
    try:
        with TempDir(chdr=False, remove_on_exit=True) as tmp:
            diamonds = tmp.path("diamonds")
            estimator = tmp.path("estimator")
            artifacts = tmp.path("artifacts")
            os.mkdir(diamonds)
            os.mkdir(estimator)
            os.mkdir(artifacts)
            tracking.set_tracking_uri(artifacts)
            # Download the diamonds dataset via mlflow run
            run(".",
                entry_point="main",
                version=None,
                parameters={"dest-dir": diamonds},
                experiment_id=tracking._get_experiment_id(),
                mode="local",
                cluster_spec=None,
                git_username=None,
                git_password=None,
                use_conda=True,
                storage_dir=None)

            # Run the main dnn app via mlflow
            run("apps/dnn-regression",
                entry_point="main",
                version=None,
                parameters={
                    "model-dir": estimator,
                    "train": os.path.join(diamonds, "train_diamonds.parquet"),
                    "test": os.path.join(diamonds, "test_diamonds.parquet"),
                    "hidden-units": "30,30",
                    "label-col": "price",
                    "steps": 5000,
                    "batch-size": 128
                },
                experiment_id=tracking._get_experiment_id(),
                mode="local",
                cluster_spec=None,
                git_username=None,
                git_password=None,
                use_conda=True,
                storage_dir=None)

            # Loading the saved model as a pyfunc.
            pyfunc = tensorflow.load_pyfunc(
                os.path.join(estimator,
                             os.listdir(estimator)[0]))

            df = pandas.read_parquet(
                os.path.join(diamonds, "test_diamonds.parquet"))

            predict_df = pyfunc.predict(df)
            assert 'predictions' in predict_df
            assert isinstance(predict_df['predictions'][0][0], numpy.float32)
    finally:
        tracking.set_tracking_uri(old_uri)