def predict_online(data, config=None): """Predict from in-memory data on the fly. """ if config is None: logger.debug( "Config path was not explicitly passed. Falling back to default config." ) config = load_yaml(DEFAULT_CONFIG_PATH) config = config["predict"] log_config_path = config["logging"]["config_path"] initialize_logging(config_path=log_config_path) model_dirname = config["model"]["dirname"] model_version = config["model"]["version"] MODEL_EXT = "keras" # joblib # model_path = Path(model_dirname) / f"v{model_version}.{MODEL_EXT}" model_path = os.path.join(model_dirname, f"v{model_version}.{MODEL_EXT}") try: # @todo: fix the hard coding # checkpoint = load_sklearn_model(model_path) checkpoint = load_keras_hub_model(model_path) pred = checkpoint.predict(data) # can't jsonify np array pred = pred.tolist() logger.info({"input": data, "pred": pred}) except Exception as e: logger.error(f"{e}") pred = [] return pred
def main(exp_config_path): """ Simple """ # read off a config file that controls experiment parameters. config = load_yaml(exp_config_path) config = config["train"] # determine an output path where results of an experiment are stored. cur_time = datetime.now().strftime("%Y-%m-%d_%H-%M-%S") exp_output_dirname = Path(config["experiment"]["output_dirname"]) / cur_time config["experiment"]["output_dirname"] = exp_output_dirname if not exp_output_dirname.exists(): Path.mkdir(exp_output_dirname, exist_ok=True) # initialize loggers # @note: log files are stored to each exp_dirname. # this is more useful for machine learning pipelines. # as opposed to a standard web app where it has centralized log. # for other modules other than train.py we log to the centralized. log_config_path = config["logging"]["config_path"] initialize_logging(config_path=log_config_path, log_dirname=exp_output_dirname) # a demo of a training pipeline using sklearn Pipeline # @todo: add more pipelines: e.g. tensorflow, pytorch if config["data"]["dataset_name"] == "iris": run_sklearn_pipeline(config) else: raise ValueError(f"Unsupported dataset was given {config['data']}.")
def predict(exp_config_path): """Make predictions from data. data (np.array): n x d array. Returns: pred (list): n - dimensional predictions. """ raise NotImplementedError # read off a config file that controls experiment parameters. config = load_yaml(exp_config_path) config = config["predict"] log_config_path = config["logging"]["config_path"] initialize_logging(config_path=log_config_path) # set data to evaluate on # @todo: not implemented yet. val_data = config["dataset_path"] pred = predict_online(val_data, config) return pred
import logging from flask import Blueprint, request, jsonify from ml_deploy_demo.predict import predict_online from ml_deploy_demo.util.utils import initialize_logging logger = logging.getLogger(__name__) initialize_logging(config_path='/app/logging.yaml') ml_app = Blueprint("ml_app", __name__) @ml_app.route("/predict", methods=["GET", "POST"]) def predict(): """Performs an inference """ if request.method == "POST": data = request.get_json() logger.debug(f"Input to predict/: {data}") pred = predict_online(data=data["data"]) return jsonify({"input": data, "pred": pred}) if request.method == "GET": msg = f"Please compose your request in POST type with data." logger.debug(f"Wrong request type {request}.") return jsonify({"msg": msg})
import numpy as np import pandas as pd import tensorflow as tf import tensorflow_hub as hub import tensorflow_datasets as tfds from ml_deploy_demo.util.utils import load_yaml, initialize_logging DEFAULT_CONFIG_PATH = "/app/experiment_configs/default.yaml" logger = logging.getLogger(__name__) config = load_yaml(DEFAULT_CONFIG_PATH) config = config["train"] log_config_path = config["logging"]["config_path"] initialize_logging(config_path=log_config_path) # This example is taken from: # https://www.tensorflow.org/tutorials/keras/text_classification_with_hub def run_pipeline(): """ runs pipeline to train keras DNN model for sentiment classification """ # Split the training set into 60% and 40%, so we'll end up with 15,000 examples # for training, 10,000 examples for validation and 25,000 examples for testing. train_validation_split = tfds.Split.TRAIN.subsplit([6, 4]) (train_data, validation_data), test_data = tfds.load( name="imdb_reviews", split=(train_validation_split, tfds.Split.TEST),