def run_pipeline(events, models): tNameId = bt.Feature_id_transform(min_size=0, exclude_missing=True, zero_based=True, input_feature="name", output_feature="nameId") tAuto = pauto.Auto_transform(max_values_numeric_categorical=2, exclude=["nameId", "name"]) xgb = xg.XGBoostClassifier(target="nameId", target_readable="name", excluded=["name"], learning_rate=0.1, silent=1) cv = cf.Seldon_KFold(xgb, 5) logger.info("cross validation scores %s", cv.get_scores()) transformers = [("tName", tNameId), ("tAuto", tAuto), ("cv", cv)] p = Pipeline(transformers) pw = sutl.Pipeline_wrapper() df = pw.create_dataframe_from_files(events) df2 = p.fit_transform(df) pw.save_pipeline(p, models) logger.info("cross validation scores %s", cv.get_scores())
def run_pipeline(events, models): tNameId = bt.Feature_id_transform(min_size=0, exclude_missing=True, zero_based=True, input_feature="name", output_feature="nameId") tAuto = pauto.Auto_transform(max_values_numeric_categorical=2, exclude=["nameId", "name"]) sk_classifier = RandomForestClassifier(verbose=1) classifier = ske.SKLearnClassifier(clf=sk_classifier, target="nameId", excluded=["name"]) cv = cf.Seldon_KFold(classifier, 5) logger.info("cross validation scores %s", cv.get_scores()) transformers = [("tName", tNameId), ("tAuto", tAuto), ("cv", cv)] p = Pipeline(transformers) pw = sutl.Pipeline_wrapper() df = pw.create_dataframe(events) df2 = p.fit_transform(df) pw.save_pipeline(p, models) logger.info("cross validation scores %s", cv.get_scores())
def create_prediction_microservice(self, pipeline_folder, model_name): """ Create a prediction Flask microservice app Parameters ---------- pipeline_folder : str location of pipeline model_name : str model name to use for this pipeline """ app = Flask(__name__) rint = random.randint(1, 999999) pw = sutl.Pipeline_wrapper(work_folder='/tmp/pl_' + str(rint), aws_key=self.aws_key, aws_secret=self.aws_secret) pipeline = pw.load_pipeline(pipeline_folder) app.config["seldon_pipeline_wrapper"] = pw app.config["seldon_pipeline"] = pipeline app.config["seldon_model_name"] = model_name app.register_blueprint(predict_blueprint) # other setup tasks return app
def run_pipeline(events,models): tAuto = pauto.Auto_transform(max_values_numeric_categorical=2,exclude=["label"]) detector = anod.iNNEDetector() wrapper = aw.AnomalyWrapper(clf=detector,excluded=["label"]) transformers = [("tAuto",tAuto),("clf",wrapper)] p = Pipeline(transformers) pw = sutl.Pipeline_wrapper() df = pw.create_dataframe_from_files(events) df2 = p.fit_transform(df) pw.save_pipeline(p,models)
def train(self, sample): tTfidf = ptfidf.Tfidf_transform(input_feature="review", output_feature="tfidf", target_feature="sentiment", min_df=10, max_df=0.7, select_features=False, topn_features=50000, stop_words="english", ngram_range=[1, 2]) tFilter2 = bt.Include_features_transform( included=["tfidf", "sentiment"]) svmTransform = bt.Svmlight_transform(output_feature="svmfeatures", excluded=["sentiment"], zero_based=False) classifier_xg = xg.XGBoostClassifier(target="sentiment", svmlight_feature="svmfeatures", silent=1, max_depth=5, n_estimators=200, objective='binary:logistic', scale_pos_weight=0.2) cv = cf.Seldon_KFold(classifier_xg, metric='auc', save_folds_folder="./folds") transformers = [("tTfidf", tTfidf), ("tFilter2", tFilter2), ("svmTransform", svmTransform), ("cv", cv)] p = Pipeline(transformers) pw = sutl.Pipeline_wrapper() df = pw.create_dataframe_from_files([self.data_folder], df_format="csv") if sample < 1.0: logger.info("sampling dataset to size %s ", sample) df = df.sample(frac=sample, random_state=1) logger.info("Data frame shape %d , %d", df.shape[0], df.shape[1]) df2 = p.fit_transform(df) pw.save_pipeline(p, self.model_folder) logger.info("cross validation scores %s", cv.get_scores()) return p
def run_pipeline(events, models): tNameId = bt.Feature_id_transform(min_size=0, exclude_missing=True, zero_based=True, input_feature="name", output_feature="nameId") tAuto = pauto.Auto_transform(max_values_numeric_categorical=2, exclude=["nameId", "name"]) keras = sk.KerasClassifier(model_create=create_model, target="nameId", target_readable="name") transformers = [("tName", tNameId), ("tAuto", tAuto), ("keras", keras)] p = Pipeline(transformers) pw = sutl.Pipeline_wrapper() df = pw.create_dataframe(events) df2 = p.fit(df) pw.save_pipeline(p, models)
def run_pipeline(events, models): tNameId = bt.Feature_id_transform(min_size=0, exclude_missing=True, zero_based=True, input_feature="name", output_feature="nameId") tAuto = pauto.Auto_transform(max_values_numeric_categorical=2, exclude=["nameId", "name"]) xgb = xg.XGBoostClassifier(target="nameId", target_readable="name", excluded=["name"], learning_rate=0.1, silent=0) transformers = [("tName", tNameId), ("tAuto", tAuto), ("xgb", xgb)] p = Pipeline(transformers) pw = sutl.Pipeline_wrapper() df = pw.create_dataframe(events) df2 = p.fit(df) pw.save_pipeline(p, models)
import importlib from flask import Flask, jsonify from flask import request app = Flask(__name__) import json import pprint from sklearn.pipeline import Pipeline import seldon.pipeline.util as sutl import random app.config.from_object('server_config') rint = random.randint(1, 999999) if 'AWS_KEY' in app.config: pw = sutl.Pipeline_wrapper(work_folder='/tmp/pl_' + str(rint), aws_key=app.config['AWS_KEY'], aws_secret=app.config['AWS_SECRET']) else: pw = sutl.Pipeline_wrapper(work_folder='/tmp/pl_' + str(rint)) pipeline = pw.load_pipeline(app.config['PIPELINE']) def extract_input(): client = request.args.get('client') j = json.loads(request.args.get('json')) input = {"client": client, "json": j} return input @app.route('/predict', methods=['GET']) def predict():