def create_matrix(d1: int, d2: int): pipeline_parameters_block = ''' d1 = {} d2 = {} '''.format(d1, d2) block1 = ''' import numpy as np ''' block2 = ''' rnd_matrix = np.random.rand(d1, d2) ''' data_saving_block = ''' # -----------------------DATA SAVING START--------------------------------- from kale.marshal import utils as _kale_marshal_utils _kale_marshal_utils.set_kale_data_directory("/marshal") _kale_marshal_utils.save(rnd_matrix, "rnd_matrix") # -----------------------DATA SAVING END----------------------------------- ''' # run the code blocks inside a jupyter kernel from kale.utils.jupyter_utils import run_code as _kale_run_code from kale.utils.jupyter_utils import update_uimetadata as _kale_update_uimetadata blocks = (pipeline_parameters_block, block1, block2, data_saving_block) html_artifact = _kale_run_code(blocks) with open("/create_matrix.html", "w") as f: f.write(html_artifact) _kale_update_uimetadata('create_matrix')
def test(): block1 = ''' v1 = "Hello" ''' block2 = ''' print(v1) ''' data_saving_block = ''' # -----------------------DATA SAVING START--------------------------------- from kale.marshal import utils as _kale_marshal_utils _kale_marshal_utils.set_kale_data_directory("") _kale_marshal_utils.save(v1, "v1") # -----------------------DATA SAVING END----------------------------------- ''' # run the code blocks inside a jupyter kernel from kale.utils.jupyter_utils import run_code as _kale_run_code from kale.utils.kfp_utils import \ update_uimetadata as _kale_update_uimetadata blocks = ( block1, block2, data_saving_block) html_artifact = _kale_run_code(blocks) with open("/test.html", "w") as f: f.write(html_artifact) _kale_update_uimetadata('test')
def sum_matrix(): data_loading_block = ''' # -----------------------DATA LOADING START-------------------------------- from kale.marshal import utils as _kale_marshal_utils _kale_marshal_utils.set_kale_data_directory("/marshal") _kale_marshal_utils.set_kale_directory_file_names() rnd_matrix = _kale_marshal_utils.load("rnd_matrix") # -----------------------DATA LOADING END---------------------------------- ''' block1 = ''' import numpy as np ''' block2 = ''' result = rnd_matrix.sum() ''' data_saving_block = ''' # -----------------------DATA SAVING START--------------------------------- from kale.marshal import utils as _kale_marshal_utils _kale_marshal_utils.set_kale_data_directory("/marshal") _kale_marshal_utils.save(result, "result") # -----------------------DATA SAVING END----------------------------------- ''' # run the code blocks inside a jupyter kernel from kale.utils.jupyter_utils import run_code as _kale_run_code from kale.utils.jupyter_utils import update_uimetadata as _kale_update_uimetadata blocks = (data_loading_block, block1, block2, data_saving_block) html_artifact = _kale_run_code(blocks) with open("/sum_matrix.html", "w") as f: f.write(html_artifact) _kale_update_uimetadata('sum_matrix')
def randomforest(): from kale.utils import mlmd_utils as _kale_mlmd_utils _kale_mlmd_utils.init_metadata() data_loading_block = ''' # -----------------------DATA LOADING START-------------------------------- from kale.marshal import utils as _kale_marshal_utils _kale_marshal_utils.set_kale_data_directory("/marshal") _kale_marshal_utils.set_kale_directory_file_names() train_df = _kale_marshal_utils.load("train_df") train_labels = _kale_marshal_utils.load("train_labels") # -----------------------DATA LOADING END---------------------------------- ''' block1 = ''' import numpy as np import pandas as pd import seaborn as sns from matplotlib import pyplot as plt from matplotlib import style from sklearn import linear_model from sklearn.linear_model import LogisticRegression from sklearn.ensemble import RandomForestClassifier from sklearn.linear_model import Perceptron from sklearn.linear_model import SGDClassifier from sklearn.tree import DecisionTreeClassifier from sklearn.neighbors import KNeighborsClassifier from sklearn.svm import SVC from sklearn.naive_bayes import GaussianNB ''' block2 = ''' random_forest = RandomForestClassifier(n_estimators=100) random_forest.fit(train_df, train_labels) acc_random_forest = round(random_forest.score(train_df, train_labels) * 100, 2) ''' data_saving_block = ''' # -----------------------DATA SAVING START--------------------------------- from kale.marshal import utils as _kale_marshal_utils _kale_marshal_utils.set_kale_data_directory("/marshal") _kale_marshal_utils.save(acc_random_forest, "acc_random_forest") # -----------------------DATA SAVING END----------------------------------- ''' # run the code blocks inside a jupyter kernel from kale.utils.jupyter_utils import run_code as _kale_run_code from kale.utils.kfp_utils import \ update_uimetadata as _kale_update_uimetadata blocks = (data_loading_block, block1, block2, data_saving_block) html_artifact = _kale_run_code(blocks) with open("/randomforest.html", "w") as f: f.write(html_artifact) _kale_update_uimetadata('randomforest') _kale_mlmd_utils.call("mark_execution_complete")
def results(): from kale.utils import mlmd_utils as _kale_mlmd_utils _kale_mlmd_utils.init_metadata() data_loading_block = ''' # -----------------------DATA LOADING START-------------------------------- from kale.marshal import utils as _kale_marshal_utils _kale_marshal_utils.set_kale_data_directory("/marshal") _kale_marshal_utils.set_kale_directory_file_names() acc_decision_tree = _kale_marshal_utils.load("acc_decision_tree") acc_gaussian = _kale_marshal_utils.load("acc_gaussian") acc_linear_svc = _kale_marshal_utils.load("acc_linear_svc") acc_log = _kale_marshal_utils.load("acc_log") acc_random_forest = _kale_marshal_utils.load("acc_random_forest") # -----------------------DATA LOADING END---------------------------------- ''' block1 = ''' import numpy as np import pandas as pd import seaborn as sns from matplotlib import pyplot as plt from matplotlib import style from sklearn import linear_model from sklearn.linear_model import LogisticRegression from sklearn.ensemble import RandomForestClassifier from sklearn.linear_model import Perceptron from sklearn.linear_model import SGDClassifier from sklearn.tree import DecisionTreeClassifier from sklearn.neighbors import KNeighborsClassifier from sklearn.svm import SVC from sklearn.naive_bayes import GaussianNB ''' block2 = ''' results = pd.DataFrame({ 'Model': ['Support Vector Machines', 'logistic Regression', 'Random Forest', 'Naive Bayes', 'Decision Tree'], 'Score': [acc_linear_svc, acc_log, acc_random_forest, acc_gaussian, acc_decision_tree]}) result_df = results.sort_values(by='Score', ascending=False) result_df = result_df.set_index('Score') print(result_df) ''' # run the code blocks inside a jupyter kernel from kale.utils.jupyter_utils import run_code as _kale_run_code from kale.utils.kfp_utils import \ update_uimetadata as _kale_update_uimetadata blocks = (data_loading_block, block1, block2, ) html_artifact = _kale_run_code(blocks) with open("/results.html", "w") as f: f.write(html_artifact) _kale_update_uimetadata('results') _kale_mlmd_utils.call("mark_execution_complete")
def pipeline_metrics(d1: int, d2: int): pipeline_parameters_block = ''' d1 = {} d2 = {} '''.format(d1, d2) data_loading_block = ''' # -----------------------DATA LOADING START-------------------------------- from kale.marshal import utils as _kale_marshal_utils _kale_marshal_utils.set_kale_data_directory("/marshal") _kale_marshal_utils.set_kale_directory_file_names() result = _kale_marshal_utils.load("result") # -----------------------DATA LOADING END---------------------------------- ''' block1 = ''' import json metrics_metadata = list() metrics = { "d1": d1, "d2": d2, "result": result, } for k in metrics: if isinstance(metrics[k], (int, float)): metric = metrics[k] else: try: metric = float(metrics[k]) except ValueError: print("Variable {} with type {} not supported as pipeline" " metric. Can only write `int` or `float` types as" " pipeline metrics".format(k, type(k))) continue metrics_metadata.append({ 'name': k, 'numberValue': metric, 'format': "RAW", }) with open('/mlpipeline-metrics.json', 'w') as f: json.dump({'metrics': metrics_metadata}, f) ''' # run the code blocks inside a jupyter kernel from kale.utils.jupyter_utils import run_code as _kale_run_code from kale.utils.jupyter_utils import update_uimetadata as _kale_update_uimetadata blocks = ( pipeline_parameters_block, data_loading_block, block1, ) html_artifact = _kale_run_code(blocks) with open("/pipeline_metrics.html", "w") as f: f.write(html_artifact) _kale_update_uimetadata('pipeline_metrics')
def loaddata(): from kale.utils import mlmd_utils as _kale_mlmd_utils _kale_mlmd_utils.init_metadata() block1 = ''' import numpy as np import pandas as pd import seaborn as sns from matplotlib import pyplot as plt from matplotlib import style from sklearn import linear_model from sklearn.linear_model import LogisticRegression from sklearn.ensemble import RandomForestClassifier from sklearn.linear_model import Perceptron from sklearn.linear_model import SGDClassifier from sklearn.tree import DecisionTreeClassifier from sklearn.neighbors import KNeighborsClassifier from sklearn.svm import SVC from sklearn.naive_bayes import GaussianNB ''' block2 = ''' path = "data/" PREDICTION_LABEL = 'Survived' test_df = pd.read_csv(path + "test.csv") train_df = pd.read_csv(path + "train.csv") ''' data_saving_block = ''' # -----------------------DATA SAVING START--------------------------------- from kale.marshal import utils as _kale_marshal_utils _kale_marshal_utils.set_kale_data_directory("/marshal") _kale_marshal_utils.save(PREDICTION_LABEL, "PREDICTION_LABEL") _kale_marshal_utils.save(test_df, "test_df") _kale_marshal_utils.save(train_df, "train_df") # -----------------------DATA SAVING END----------------------------------- ''' # run the code blocks inside a jupyter kernel from kale.utils.jupyter_utils import run_code as _kale_run_code from kale.utils.kfp_utils import \ update_uimetadata as _kale_update_uimetadata blocks = ( block1, block2, data_saving_block) html_artifact = _kale_run_code(blocks) with open("/loaddata.html", "w") as f: f.write(html_artifact) _kale_update_uimetadata('loaddata') _kale_mlmd_utils.call("mark_execution_complete")
def logisticregression(): data_loading_block = ''' # -----------------------DATA LOADING START-------------------------------- from kale.marshal import utils as _kale_marshal_utils _kale_marshal_utils.set_kale_data_directory("/marshal") _kale_marshal_utils.set_kale_directory_file_names() train_df = _kale_marshal_utils.load("train_df") train_labels = _kale_marshal_utils.load("train_labels") # -----------------------DATA LOADING END---------------------------------- ''' block1 = ''' import numpy as np import pandas as pd import seaborn as sns from matplotlib import pyplot as plt from matplotlib import style from sklearn import linear_model from sklearn.linear_model import LogisticRegression from sklearn.ensemble import RandomForestClassifier from sklearn.linear_model import Perceptron from sklearn.linear_model import SGDClassifier from sklearn.tree import DecisionTreeClassifier from sklearn.neighbors import KNeighborsClassifier from sklearn.svm import SVC from sklearn.naive_bayes import GaussianNB ''' block2 = ''' logreg = LogisticRegression(solver='lbfgs', max_iter=110) logreg.fit(train_df, train_labels) acc_log = round(logreg.score(train_df, train_labels) * 100, 2) ''' data_saving_block = ''' # -----------------------DATA SAVING START--------------------------------- from kale.marshal import utils as _kale_marshal_utils _kale_marshal_utils.set_kale_data_directory("/marshal") _kale_marshal_utils.save(acc_log, "acc_log") # -----------------------DATA SAVING END----------------------------------- ''' # run the code blocks inside a jupyter kernel from kale.utils.jupyter_utils import run_code as _kale_run_code from kale.utils.kfp_utils import \ update_uimetadata as _kale_update_uimetadata blocks = (data_loading_block, block1, block2, data_saving_block) html_artifact = _kale_run_code(blocks) with open("/logisticregression.html", "w") as f: f.write(html_artifact) _kale_update_uimetadata('logisticregression')
def test(): block1 = ''' print("hello") ''' # run the code blocks inside a jupyter kernel from kale.utils.jupyter_utils import run_code as _kale_run_code from kale.utils.jupyter_utils import update_uimetadata as _kale_update_uimetadata blocks = (block1, ) html_artifact = _kale_run_code(blocks) with open("/test.html", "w") as f: f.write(html_artifact) _kale_update_uimetadata('test')
def create_matrix(d1: int, d2: int): pipeline_parameters_block = ''' d1 = {} d2 = {} '''.format(d1, d2) from kale.utils import mlmd_utils as _kale_mlmd_utils _kale_mlmd_utils.init_metadata() block1 = ''' import numpy as np ''' block2 = ''' rnd_matrix = np.random.rand(d1, d2) ''' block3 = ''' from kale.utils import kfp_utils as _kale_kfp_utils _kale_kfp_metrics = { "d1": d1, "d2": d2 } _kale_kfp_utils.generate_mlpipeline_metrics(_kale_kfp_metrics) ''' data_saving_block = ''' # -----------------------DATA SAVING START--------------------------------- from kale.marshal import utils as _kale_marshal_utils _kale_marshal_utils.set_kale_data_directory("/marshal") _kale_marshal_utils.save(rnd_matrix, "rnd_matrix") # -----------------------DATA SAVING END----------------------------------- ''' # run the code blocks inside a jupyter kernel from kale.utils.jupyter_utils import run_code as _kale_run_code from kale.utils.kfp_utils import \ update_uimetadata as _kale_update_uimetadata blocks = (pipeline_parameters_block, block1, block2, block3, data_saving_block) html_artifact = _kale_run_code(blocks) with open("/create_matrix.html", "w") as f: f.write(html_artifact) _kale_update_uimetadata('create_matrix') _kale_mlmd_utils.call("mark_execution_complete")
def sum_matrix(): from kale.utils import mlmd_utils as _kale_mlmd_utils _kale_mlmd_utils.init_metadata() data_loading_block = ''' # -----------------------DATA LOADING START-------------------------------- from kale.marshal import utils as _kale_marshal_utils _kale_marshal_utils.set_kale_data_directory("/marshal") _kale_marshal_utils.set_kale_directory_file_names() rnd_matrix = _kale_marshal_utils.load("rnd_matrix") # -----------------------DATA LOADING END---------------------------------- ''' block1 = ''' import numpy as np ''' block2 = ''' sum_result = rnd_matrix.sum() ''' block3 = ''' from kale.utils import kfp_utils as _kale_kfp_utils _kale_kfp_metrics = { "sum-result": sum_result } _kale_kfp_utils.generate_mlpipeline_metrics(_kale_kfp_metrics) ''' # run the code blocks inside a jupyter kernel from kale.utils.jupyter_utils import run_code as _kale_run_code from kale.utils.kfp_utils import \ update_uimetadata as _kale_update_uimetadata blocks = ( data_loading_block, block1, block2, block3, ) html_artifact = _kale_run_code(blocks) with open("/sum_matrix.html", "w") as f: f.write(html_artifact) _kale_update_uimetadata('sum_matrix') _kale_mlmd_utils.call("mark_execution_complete")
def test(): data_loading_block = ''' # -----------------------DATA LOADING START-------------------------------- from kale.marshal import utils as _kale_marshal_utils _kale_marshal_utils.set_kale_data_directory("") _kale_marshal_utils.set_kale_directory_file_names() v1 = _kale_marshal_utils.load("v1") # -----------------------DATA LOADING END---------------------------------- ''' # run the code blocks inside a jupyter kernel from kale.utils.jupyter_utils import run_code as _kale_run_code from kale.utils.jupyter_utils import update_uimetadata as _kale_update_uimetadata blocks = (data_loading_block, ) html_artifact = _kale_run_code(blocks) with open("/test.html", "w") as f: f.write(html_artifact) _kale_update_uimetadata('test')
def test(): from kale.utils import mlmd_utils as _kale_mlmd_utils _kale_mlmd_utils.init_metadata() block1 = ''' print("hello") ''' # run the code blocks inside a jupyter kernel from kale.utils.jupyter_utils import run_code as _kale_run_code from kale.utils.kfp_utils import \ update_uimetadata as _kale_update_uimetadata blocks = (block1, ) html_artifact = _kale_run_code(blocks) with open("/test.html", "w") as f: f.write(html_artifact) _kale_update_uimetadata('test') _kale_mlmd_utils.call("mark_execution_complete")
def featureengineering(): data_loading_block = ''' # -----------------------DATA LOADING START-------------------------------- from kale.marshal import utils as _kale_marshal_utils _kale_marshal_utils.set_kale_data_directory("/marshal") _kale_marshal_utils.set_kale_directory_file_names() PREDICTION_LABEL = _kale_marshal_utils.load("PREDICTION_LABEL") test_df = _kale_marshal_utils.load("test_df") train_df = _kale_marshal_utils.load("train_df") # -----------------------DATA LOADING END---------------------------------- ''' block1 = ''' import numpy as np import pandas as pd import seaborn as sns from matplotlib import pyplot as plt from matplotlib import style from sklearn import linear_model from sklearn.linear_model import LogisticRegression from sklearn.ensemble import RandomForestClassifier from sklearn.linear_model import Perceptron from sklearn.linear_model import SGDClassifier from sklearn.tree import DecisionTreeClassifier from sklearn.neighbors import KNeighborsClassifier from sklearn.svm import SVC from sklearn.naive_bayes import GaussianNB ''' block2 = ''' data = [train_df, test_df] for dataset in data: dataset['Fare'] = dataset['Fare'].fillna(0) dataset['Fare'] = dataset['Fare'].astype(int) ''' block3 = ''' data = [train_df, test_df] titles = {"Mr": 1, "Miss": 2, "Mrs": 3, "Master": 4, "Rare": 5} for dataset in data: # extract titles dataset['Title'] = dataset.Name.str.extract(' ([A-Za-z]+)\\.', expand=False) # replace titles with a more common title or as Rare dataset['Title'] = dataset['Title'].replace(['Lady', 'Countess','Capt', 'Col','Don', 'Dr',\\ 'Major', 'Rev', 'Sir', 'Jonkheer', 'Dona'], 'Rare') dataset['Title'] = dataset['Title'].replace('Mlle', 'Miss') dataset['Title'] = dataset['Title'].replace('Ms', 'Miss') dataset['Title'] = dataset['Title'].replace('Mme', 'Mrs') # convert titles into numbers dataset['Title'] = dataset['Title'].map(titles) # filling NaN with 0, to get safe dataset['Title'] = dataset['Title'].fillna(0) train_df = train_df.drop(['Name'], axis=1) test_df = test_df.drop(['Name'], axis=1) ''' block4 = ''' genders = {"male": 0, "female": 1} data = [train_df, test_df] for dataset in data: dataset['Sex'] = dataset['Sex'].map(genders) ''' block5 = ''' train_df = train_df.drop(['Ticket'], axis=1) test_df = test_df.drop(['Ticket'], axis=1) ''' block6 = ''' ports = {"S": 0, "C": 1, "Q": 2} data = [train_df, test_df] for dataset in data: dataset['Embarked'] = dataset['Embarked'].map(ports) ''' block7 = ''' data = [train_df, test_df] for dataset in data: dataset['Age'] = dataset['Age'].astype(int) dataset.loc[ dataset['Age'] <= 11, 'Age'] = 0 dataset.loc[(dataset['Age'] > 11) & (dataset['Age'] <= 18), 'Age'] = 1 dataset.loc[(dataset['Age'] > 18) & (dataset['Age'] <= 22), 'Age'] = 2 dataset.loc[(dataset['Age'] > 22) & (dataset['Age'] <= 27), 'Age'] = 3 dataset.loc[(dataset['Age'] > 27) & (dataset['Age'] <= 33), 'Age'] = 4 dataset.loc[(dataset['Age'] > 33) & (dataset['Age'] <= 40), 'Age'] = 5 dataset.loc[(dataset['Age'] > 40) & (dataset['Age'] <= 66), 'Age'] = 6 dataset.loc[ dataset['Age'] > 66, 'Age'] = 6 # let's see how it's distributed train_df['Age'].value_counts() ''' block8 = ''' data = [train_df, test_df] for dataset in data: dataset.loc[ dataset['Fare'] <= 7.91, 'Fare'] = 0 dataset.loc[(dataset['Fare'] > 7.91) & (dataset['Fare'] <= 14.454), 'Fare'] = 1 dataset.loc[(dataset['Fare'] > 14.454) & (dataset['Fare'] <= 31), 'Fare'] = 2 dataset.loc[(dataset['Fare'] > 31) & (dataset['Fare'] <= 99), 'Fare'] = 3 dataset.loc[(dataset['Fare'] > 99) & (dataset['Fare'] <= 250), 'Fare'] = 4 dataset.loc[ dataset['Fare'] > 250, 'Fare'] = 5 dataset['Fare'] = dataset['Fare'].astype(int) ''' block9 = ''' data = [train_df, test_df] for dataset in data: dataset['Age_Class']= dataset['Age']* dataset['Pclass'] ''' block10 = ''' for dataset in data: dataset['Fare_Per_Person'] = dataset['Fare']/(dataset['relatives']+1) dataset['Fare_Per_Person'] = dataset['Fare_Per_Person'].astype(int) # Let's take a last look at the training set, before we start training the models. train_df.head(10) ''' block11 = ''' train_labels = train_df[PREDICTION_LABEL] train_df = train_df.drop(PREDICTION_LABEL, axis=1) ''' data_saving_block = ''' # -----------------------DATA SAVING START--------------------------------- from kale.marshal import utils as _kale_marshal_utils _kale_marshal_utils.set_kale_data_directory("/marshal") _kale_marshal_utils.save(train_df, "train_df") _kale_marshal_utils.save(train_labels, "train_labels") # -----------------------DATA SAVING END----------------------------------- ''' # run the code blocks inside a jupyter kernel from kale.utils.jupyter_utils import run_code as _kale_run_code from kale.utils.kfp_utils import \ update_uimetadata as _kale_update_uimetadata blocks = (data_loading_block, block1, block2, block3, block4, block5, block6, block7, block8, block9, block10, block11, data_saving_block) html_artifact = _kale_run_code(blocks) with open("/featureengineering.html", "w") as f: f.write(html_artifact) _kale_update_uimetadata('featureengineering')
def test_outliers(DEPLOY_NAMESPACE: str, MINIO_ACCESS_KEY: str, MINIO_HOST: str, MINIO_SECRET_KEY: str): pipeline_parameters_block = ''' DEPLOY_NAMESPACE = "{}" MINIO_ACCESS_KEY = "{}" MINIO_HOST = "{}" MINIO_SECRET_KEY = "{}" '''.format(DEPLOY_NAMESPACE, MINIO_ACCESS_KEY, MINIO_HOST, MINIO_SECRET_KEY) from kale.utils import mlmd_utils as _kale_mlmd_utils _kale_mlmd_utils.init_metadata() block1 = ''' import numpy as np from sklearn.ensemble import RandomForestClassifier from sklearn.compose import ColumnTransformer from sklearn.pipeline import Pipeline from sklearn.impute import SimpleImputer from sklearn.metrics import accuracy_score from sklearn.preprocessing import StandardScaler, OneHotEncoder from alibi.explainers import AnchorTabular from alibi.datasets import fetch_adult from minio import Minio from minio.error import ResponseError from joblib import dump, load import dill import time import json from subprocess import run, Popen, PIPE from alibi_detect.utils.data import create_outlier_batch ''' block2 = ''' def get_minio(): return Minio(MINIO_HOST, access_key=MINIO_ACCESS_KEY, secret_key=MINIO_SECRET_KEY, secure=False) ''' block3 = ''' def predict(): payload='{"data": {"ndarray": [[300, 4, 4, 2, 1, 4, 4, 0, 0, 0, 600, 9]]}}' cmd=f"""curl -d '{payload}' \\ http://income-classifier-default.{DEPLOY_NAMESPACE}:8000/api/v1.0/predictions \\ -H "Content-Type: application/json" """ ret = Popen(cmd, shell=True,stdout=PIPE) raw = ret.stdout.read().decode("utf-8") print(raw) ''' block4 = ''' def get_outlier_event_display_logs(): cmd=f"kubectl logs $(kubectl get pod -l app=event-display -o jsonpath='{{.items[0].metadata.name}}' -n {DEPLOY_NAMESPACE}) -n {DEPLOY_NAMESPACE}" ret = Popen(cmd, shell=True,stdout=PIPE) res = ret.stdout.read().decode("utf-8").split("\\n") data= [] for i in range(0,len(res)): if res[i] == 'Data,': j = json.loads(json.loads(res[i+1])) if "is_outlier"in j["data"].keys(): data.append(j) if len(data) > 0: return data[-1] else: return None j = None while j is None: predict() print("Waiting for outlier logs, sleeping") time.sleep(2) j = get_outlier_event_display_logs() print(j) print("Outlier",j["data"]["is_outlier"]==[1]) ''' block5 = ''' ''' # run the code blocks inside a jupyter kernel from kale.utils.jupyter_utils import run_code as _kale_run_code from kale.utils.kfp_utils import \ update_uimetadata as _kale_update_uimetadata blocks = ( pipeline_parameters_block, block1, block2, block3, block4, block5, ) html_artifact = _kale_run_code(blocks) with open("/test_outliers.html", "w") as f: f.write(html_artifact) _kale_update_uimetadata('test_outliers') _kale_mlmd_utils.call("mark_execution_complete")
def deploy_event_display(DEPLOY_NAMESPACE: str, MINIO_ACCESS_KEY: str, MINIO_HOST: str, MINIO_SECRET_KEY: str): pipeline_parameters_block = ''' DEPLOY_NAMESPACE = "{}" MINIO_ACCESS_KEY = "{}" MINIO_HOST = "{}" MINIO_SECRET_KEY = "{}" '''.format(DEPLOY_NAMESPACE, MINIO_ACCESS_KEY, MINIO_HOST, MINIO_SECRET_KEY) from kale.utils import mlmd_utils as _kale_mlmd_utils _kale_mlmd_utils.init_metadata() block1 = ''' import numpy as np from sklearn.ensemble import RandomForestClassifier from sklearn.compose import ColumnTransformer from sklearn.pipeline import Pipeline from sklearn.impute import SimpleImputer from sklearn.metrics import accuracy_score from sklearn.preprocessing import StandardScaler, OneHotEncoder from alibi.explainers import AnchorTabular from alibi.datasets import fetch_adult from minio import Minio from minio.error import ResponseError from joblib import dump, load import dill import time import json from subprocess import run, Popen, PIPE from alibi_detect.utils.data import create_outlier_batch ''' block2 = ''' def get_minio(): return Minio(MINIO_HOST, access_key=MINIO_ACCESS_KEY, secret_key=MINIO_SECRET_KEY, secure=False) ''' block3 = ''' event_display=f"""apiVersion: apps/v1 kind: Deployment metadata: name: event-display namespace: {DEPLOY_NAMESPACE} spec: replicas: 1 selector: matchLabels: &labels app: event-display template: metadata: labels: *labels spec: containers: - name: helloworld-go # Source code: https://github.com/knative/eventing-contrib/tree/master/cmd/event_display image: gcr.io/knative-releases/knative.dev/eventing-contrib/cmd/event_display@sha256:f4628e97a836c77ed38bd3b6fd3d0b06de4d5e7db6704772fe674d48b20bd477 --- kind: Service apiVersion: v1 metadata: name: event-display namespace: {DEPLOY_NAMESPACE} spec: selector: app: event-display ports: - protocol: TCP port: 80 targetPort: 8080 --- apiVersion: eventing.knative.dev/v1alpha1 kind: Trigger metadata: name: income-outlier-display namespace: {DEPLOY_NAMESPACE} spec: broker: default filter: attributes: type: io.seldon.serving.inference.outlier subscriber: ref: apiVersion: v1 kind: Service name: event-display """ with open("event_display.yaml","w") as f: f.write(event_display) run("kubectl apply -f event_display.yaml", shell=True) ''' block4 = ''' run(f"kubectl rollout status -n {DEPLOY_NAMESPACE} deploy/event-display -n {DEPLOY_NAMESPACE}", shell=True) ''' # run the code blocks inside a jupyter kernel from kale.utils.jupyter_utils import run_code as _kale_run_code from kale.utils.kfp_utils import \ update_uimetadata as _kale_update_uimetadata blocks = ( pipeline_parameters_block, block1, block2, block3, block4, ) html_artifact = _kale_run_code(blocks) with open("/deploy_event_display.html", "w") as f: f.write(html_artifact) _kale_update_uimetadata('deploy_event_display') _kale_mlmd_utils.call("mark_execution_complete")
def build_model(INCOME_MODEL_PATH: str, MINIO_ACCESS_KEY: str, MINIO_HOST: str, MINIO_MODEL_BUCKET: str, MINIO_SECRET_KEY: str): pipeline_parameters_block = ''' INCOME_MODEL_PATH = "{}" MINIO_ACCESS_KEY = "{}" MINIO_HOST = "{}" MINIO_MODEL_BUCKET = "{}" MINIO_SECRET_KEY = "{}" '''.format(INCOME_MODEL_PATH, MINIO_ACCESS_KEY, MINIO_HOST, MINIO_MODEL_BUCKET, MINIO_SECRET_KEY) from kale.utils import mlmd_utils as _kale_mlmd_utils _kale_mlmd_utils.init_metadata() block1 = ''' import numpy as np from sklearn.ensemble import RandomForestClassifier from sklearn.compose import ColumnTransformer from sklearn.pipeline import Pipeline from sklearn.impute import SimpleImputer from sklearn.metrics import accuracy_score from sklearn.preprocessing import StandardScaler, OneHotEncoder from alibi.explainers import AnchorTabular from alibi.datasets import fetch_adult from minio import Minio from minio.error import ResponseError from joblib import dump, load import dill import time import json from subprocess import run, Popen, PIPE from alibi_detect.utils.data import create_outlier_batch ''' block2 = ''' def get_minio(): return Minio(MINIO_HOST, access_key=MINIO_ACCESS_KEY, secret_key=MINIO_SECRET_KEY, secure=False) ''' block3 = ''' adult = fetch_adult() adult.keys() ''' block4 = ''' data = adult.data target = adult.target feature_names = adult.feature_names category_map = adult.category_map ''' block5 = ''' from alibi.utils.data import gen_category_map ''' block6 = ''' np.random.seed(0) data_perm = np.random.permutation(np.c_[data, target]) data = data_perm[:,:-1] target = data_perm[:,-1] ''' block7 = ''' idx = 30000 X_train,Y_train = data[:idx,:], target[:idx] X_test, Y_test = data[idx+1:,:], target[idx+1:] ''' block8 = ''' ordinal_features = [x for x in range(len(feature_names)) if x not in list(category_map.keys())] ordinal_transformer = Pipeline(steps=[('imputer', SimpleImputer(strategy='median')), ('scaler', StandardScaler())]) ''' block9 = ''' categorical_features = list(category_map.keys()) categorical_transformer = Pipeline(steps=[('imputer', SimpleImputer(strategy='median')), ('onehot', OneHotEncoder(handle_unknown='ignore'))]) ''' block10 = ''' preprocessor = ColumnTransformer(transformers=[('num', ordinal_transformer, ordinal_features), ('cat', categorical_transformer, categorical_features)]) ''' block11 = ''' np.random.seed(0) clf = RandomForestClassifier(n_estimators=50) ''' block12 = ''' model=Pipeline(steps=[("preprocess",preprocessor),("model",clf)]) model.fit(X_train,Y_train) ''' block13 = ''' def predict_fn(x): return model.predict(x) ''' block14 = ''' #predict_fn = lambda x: clf.predict(preprocessor.transform(x)) print('Train accuracy: ', accuracy_score(Y_train, predict_fn(X_train))) print('Test accuracy: ', accuracy_score(Y_test, predict_fn(X_test))) ''' block15 = ''' dump(model, 'model.joblib') ''' block16 = ''' print(get_minio().fput_object(MINIO_MODEL_BUCKET, f"{INCOME_MODEL_PATH}/model.joblib", 'model.joblib')) ''' data_saving_block = ''' # -----------------------DATA SAVING START--------------------------------- from kale.marshal import utils as _kale_marshal_utils _kale_marshal_utils.set_kale_data_directory("/marshal") _kale_marshal_utils.save(X_test, "X_test") _kale_marshal_utils.save(X_train, "X_train") _kale_marshal_utils.save(Y_train, "Y_train") _kale_marshal_utils.save(adult, "adult") _kale_marshal_utils.save(category_map, "category_map") _kale_marshal_utils.save(feature_names, "feature_names") _kale_marshal_utils.save(model, "model") _kale_marshal_utils.save(predict_fn, "predict_fn") # -----------------------DATA SAVING END----------------------------------- ''' # run the code blocks inside a jupyter kernel from kale.utils.jupyter_utils import run_code as _kale_run_code from kale.utils.kfp_utils import \ update_uimetadata as _kale_update_uimetadata blocks = (pipeline_parameters_block, block1, block2, block3, block4, block5, block6, block7, block8, block9, block10, block11, block12, block13, block14, block15, block16, data_saving_block) html_artifact = _kale_run_code(blocks) with open("/build_model.html", "w") as f: f.write(html_artifact) _kale_update_uimetadata('build_model') _kale_mlmd_utils.call("mark_execution_complete")
def setup(MINIO_ACCESS_KEY: str, MINIO_HOST: str, MINIO_MODEL_BUCKET: str, MINIO_SECRET_KEY: str): pipeline_parameters_block = ''' MINIO_ACCESS_KEY = "{}" MINIO_HOST = "{}" MINIO_MODEL_BUCKET = "{}" MINIO_SECRET_KEY = "{}" '''.format(MINIO_ACCESS_KEY, MINIO_HOST, MINIO_MODEL_BUCKET, MINIO_SECRET_KEY) from kale.utils import mlmd_utils as _kale_mlmd_utils _kale_mlmd_utils.init_metadata() block1 = ''' import numpy as np from sklearn.ensemble import RandomForestClassifier from sklearn.compose import ColumnTransformer from sklearn.pipeline import Pipeline from sklearn.impute import SimpleImputer from sklearn.metrics import accuracy_score from sklearn.preprocessing import StandardScaler, OneHotEncoder from alibi.explainers import AnchorTabular from alibi.datasets import fetch_adult from minio import Minio from minio.error import ResponseError from joblib import dump, load import dill import time import json from subprocess import run, Popen, PIPE from alibi_detect.utils.data import create_outlier_batch ''' block2 = ''' def get_minio(): return Minio(MINIO_HOST, access_key=MINIO_ACCESS_KEY, secret_key=MINIO_SECRET_KEY, secure=False) ''' block3 = ''' minioClient = get_minio() buckets = minioClient.list_buckets() for bucket in buckets: print(bucket.name, bucket.creation_date) ''' block4 = ''' if not minioClient.bucket_exists(MINIO_MODEL_BUCKET): minioClient.make_bucket(MINIO_MODEL_BUCKET) ''' # run the code blocks inside a jupyter kernel from kale.utils.jupyter_utils import run_code as _kale_run_code from kale.utils.kfp_utils import \ update_uimetadata as _kale_update_uimetadata blocks = ( pipeline_parameters_block, block1, block2, block3, block4, ) html_artifact = _kale_run_code(blocks) with open("/setup.html", "w") as f: f.write(html_artifact) _kale_update_uimetadata('setup') _kale_mlmd_utils.call("mark_execution_complete")
def deploy_outlier(DEPLOY_NAMESPACE: str, MINIO_ACCESS_KEY: str, MINIO_HOST: str, MINIO_MODEL_BUCKET: str, MINIO_SECRET_KEY: str, OUTLIER_MODEL_PATH: str): pipeline_parameters_block = ''' DEPLOY_NAMESPACE = "{}" MINIO_ACCESS_KEY = "{}" MINIO_HOST = "{}" MINIO_MODEL_BUCKET = "{}" MINIO_SECRET_KEY = "{}" OUTLIER_MODEL_PATH = "{}" '''.format(DEPLOY_NAMESPACE, MINIO_ACCESS_KEY, MINIO_HOST, MINIO_MODEL_BUCKET, MINIO_SECRET_KEY, OUTLIER_MODEL_PATH) from kale.utils import mlmd_utils as _kale_mlmd_utils _kale_mlmd_utils.init_metadata() block1 = ''' import numpy as np from sklearn.ensemble import RandomForestClassifier from sklearn.compose import ColumnTransformer from sklearn.pipeline import Pipeline from sklearn.impute import SimpleImputer from sklearn.metrics import accuracy_score from sklearn.preprocessing import StandardScaler, OneHotEncoder from alibi.explainers import AnchorTabular from alibi.datasets import fetch_adult from minio import Minio from minio.error import ResponseError from joblib import dump, load import dill import time import json from subprocess import run, Popen, PIPE from alibi_detect.utils.data import create_outlier_batch ''' block2 = ''' def get_minio(): return Minio(MINIO_HOST, access_key=MINIO_ACCESS_KEY, secret_key=MINIO_SECRET_KEY, secure=False) ''' block3 = ''' outlier_yaml=f"""apiVersion: serving.knative.dev/v1 kind: Service metadata: name: income-outlier namespace: {DEPLOY_NAMESPACE} spec: template: metadata: annotations: autoscaling.knative.dev/minScale: "1" spec: containers: - image: seldonio/alibi-detect-server:1.2.2-dev_alibidetect imagePullPolicy: IfNotPresent args: - --model_name - adultod - --http_port - '8080' - --protocol - seldon.http - --storage_uri - s3://{MINIO_MODEL_BUCKET}/{OUTLIER_MODEL_PATH} - --reply_url - http://default-broker - --event_type - io.seldon.serving.inference.outlier - --event_source - io.seldon.serving.incomeod - OutlierDetector envFrom: - secretRef: name: seldon-init-container-secret """ with open("outlier.yaml","w") as f: f.write(outlier_yaml) run("kubectl apply -f outlier.yaml", shell=True) ''' block4 = ''' trigger_outlier_yaml=f"""apiVersion: eventing.knative.dev/v1alpha1 kind: Trigger metadata: name: income-outlier-trigger namespace: {DEPLOY_NAMESPACE} spec: filter: sourceAndType: type: io.seldon.serving.inference.request subscriber: ref: apiVersion: serving.knative.dev/v1alpha1 kind: Service name: income-outlier """ with open("outlier_trigger.yaml","w") as f: f.write(trigger_outlier_yaml) run("kubectl apply -f outlier_trigger.yaml", shell=True) ''' block5 = ''' run(f"kubectl rollout status -n {DEPLOY_NAMESPACE} deploy/$(kubectl get deploy -l serving.knative.dev/service=income-outlier -o jsonpath='{{.items[0].metadata.name}}' -n {DEPLOY_NAMESPACE})", shell=True) ''' # run the code blocks inside a jupyter kernel from kale.utils.jupyter_utils import run_code as _kale_run_code from kale.utils.kfp_utils import \ update_uimetadata as _kale_update_uimetadata blocks = ( pipeline_parameters_block, block1, block2, block3, block4, block5, ) html_artifact = _kale_run_code(blocks) with open("/deploy_outlier.html", "w") as f: f.write(html_artifact) _kale_update_uimetadata('deploy_outlier') _kale_mlmd_utils.call("mark_execution_complete")
def test_model(DEPLOY_NAMESPACE: str, MINIO_ACCESS_KEY: str, MINIO_HOST: str, MINIO_SECRET_KEY: str): pipeline_parameters_block = ''' DEPLOY_NAMESPACE = "{}" MINIO_ACCESS_KEY = "{}" MINIO_HOST = "{}" MINIO_SECRET_KEY = "{}" '''.format(DEPLOY_NAMESPACE, MINIO_ACCESS_KEY, MINIO_HOST, MINIO_SECRET_KEY) from kale.utils import mlmd_utils as _kale_mlmd_utils _kale_mlmd_utils.init_metadata() block1 = ''' import numpy as np from sklearn.ensemble import RandomForestClassifier from sklearn.compose import ColumnTransformer from sklearn.pipeline import Pipeline from sklearn.impute import SimpleImputer from sklearn.metrics import accuracy_score from sklearn.preprocessing import StandardScaler, OneHotEncoder from alibi.explainers import AnchorTabular from alibi.datasets import fetch_adult from minio import Minio from minio.error import ResponseError from joblib import dump, load import dill import time import json from subprocess import run, Popen, PIPE from alibi_detect.utils.data import create_outlier_batch ''' block2 = ''' def get_minio(): return Minio(MINIO_HOST, access_key=MINIO_ACCESS_KEY, secret_key=MINIO_SECRET_KEY, secure=False) ''' block3 = ''' payload='{"data": {"ndarray": [[53,4,0,2,8,4,4,0,0,0,60,9]]}}' cmd=f"""curl -d '{payload}' \\ http://income-classifier-default.{DEPLOY_NAMESPACE}:8000/api/v1.0/predictions \\ -H "Content-Type: application/json" """ ret = Popen(cmd, shell=True,stdout=PIPE) raw = ret.stdout.read().decode("utf-8") print(raw) ''' block4 = ''' payload='{"data": {"ndarray": [[53,4,0,2,8,4,4,0,0,0,60,9]]}}' cmd=f"""curl -d '{payload}' \\ http://income-classifier-default-explainer.{DEPLOY_NAMESPACE}:9000/api/v1.0/explain \\ -H "Content-Type: application/json" """ ret = Popen(cmd, shell=True,stdout=PIPE) raw = ret.stdout.read().decode("utf-8") print(raw) ''' # run the code blocks inside a jupyter kernel from kale.utils.jupyter_utils import run_code as _kale_run_code from kale.utils.kfp_utils import \ update_uimetadata as _kale_update_uimetadata blocks = ( pipeline_parameters_block, block1, block2, block3, block4, ) html_artifact = _kale_run_code(blocks) with open("/test_model.html", "w") as f: f.write(html_artifact) _kale_update_uimetadata('test_model') _kale_mlmd_utils.call("mark_execution_complete")
def deploy_seldon(DEPLOY_NAMESPACE: str, EXPLAINER_MODEL_PATH: str, INCOME_MODEL_PATH: str, MINIO_ACCESS_KEY: str, MINIO_HOST: str, MINIO_MODEL_BUCKET: str, MINIO_SECRET_KEY: str): pipeline_parameters_block = ''' DEPLOY_NAMESPACE = "{}" EXPLAINER_MODEL_PATH = "{}" INCOME_MODEL_PATH = "{}" MINIO_ACCESS_KEY = "{}" MINIO_HOST = "{}" MINIO_MODEL_BUCKET = "{}" MINIO_SECRET_KEY = "{}" '''.format(DEPLOY_NAMESPACE, EXPLAINER_MODEL_PATH, INCOME_MODEL_PATH, MINIO_ACCESS_KEY, MINIO_HOST, MINIO_MODEL_BUCKET, MINIO_SECRET_KEY) from kale.utils import mlmd_utils as _kale_mlmd_utils _kale_mlmd_utils.init_metadata() block1 = ''' import numpy as np from sklearn.ensemble import RandomForestClassifier from sklearn.compose import ColumnTransformer from sklearn.pipeline import Pipeline from sklearn.impute import SimpleImputer from sklearn.metrics import accuracy_score from sklearn.preprocessing import StandardScaler, OneHotEncoder from alibi.explainers import AnchorTabular from alibi.datasets import fetch_adult from minio import Minio from minio.error import ResponseError from joblib import dump, load import dill import time import json from subprocess import run, Popen, PIPE from alibi_detect.utils.data import create_outlier_batch ''' block2 = ''' def get_minio(): return Minio(MINIO_HOST, access_key=MINIO_ACCESS_KEY, secret_key=MINIO_SECRET_KEY, secure=False) ''' block3 = ''' secret = f"""apiVersion: v1 kind: Secret metadata: name: seldon-init-container-secret namespace: {DEPLOY_NAMESPACE} type: Opaque stringData: AWS_ACCESS_KEY_ID: {MINIO_ACCESS_KEY} AWS_SECRET_ACCESS_KEY: {MINIO_SECRET_KEY} AWS_ENDPOINT_URL: http://{MINIO_HOST} USE_SSL: "false" """ with open("secret.yaml","w") as f: f.write(secret) run("cat secret.yaml | kubectl apply -f -", shell=True) ''' block4 = ''' sa = f"""apiVersion: v1 kind: ServiceAccount metadata: name: minio-sa namespace: {DEPLOY_NAMESPACE} secrets: - name: seldon-init-container-secret """ with open("sa.yaml","w") as f: f.write(sa) run("kubectl apply -f sa.yaml", shell=True) ''' block5 = ''' model_yaml=f"""apiVersion: machinelearning.seldon.io/v1 kind: SeldonDeployment metadata: name: income-classifier namespace: {DEPLOY_NAMESPACE} spec: predictors: - componentSpecs: graph: implementation: SKLEARN_SERVER modelUri: s3://{MINIO_MODEL_BUCKET}/{INCOME_MODEL_PATH} envSecretRefName: seldon-init-container-secret name: classifier logger: mode: all explainer: type: AnchorTabular modelUri: s3://{MINIO_MODEL_BUCKET}/{EXPLAINER_MODEL_PATH} envSecretRefName: seldon-init-container-secret name: default replicas: 1 """ with open("model.yaml","w") as f: f.write(model_yaml) run("kubectl apply -f model.yaml", shell=True) ''' block6 = ''' run(f"kubectl rollout status -n {DEPLOY_NAMESPACE} deploy/$(kubectl get deploy -l seldon-deployment-id=income-classifier -o jsonpath='{{.items[0].metadata.name}}' -n {DEPLOY_NAMESPACE})", shell=True) ''' block7 = ''' run(f"kubectl rollout status -n {DEPLOY_NAMESPACE} deploy/$(kubectl get deploy -l seldon-deployment-id=income-classifier -o jsonpath='{{.items[1].metadata.name}}' -n {DEPLOY_NAMESPACE})", shell=True) ''' # run the code blocks inside a jupyter kernel from kale.utils.jupyter_utils import run_code as _kale_run_code from kale.utils.kfp_utils import \ update_uimetadata as _kale_update_uimetadata blocks = ( pipeline_parameters_block, block1, block2, block3, block4, block5, block6, block7, ) html_artifact = _kale_run_code(blocks) with open("/deploy_seldon.html", "w") as f: f.write(html_artifact) _kale_update_uimetadata('deploy_seldon') _kale_mlmd_utils.call("mark_execution_complete")
def train_explainer(EXPLAINER_MODEL_PATH: str, MINIO_ACCESS_KEY: str, MINIO_HOST: str, MINIO_MODEL_BUCKET: str, MINIO_SECRET_KEY: str): pipeline_parameters_block = ''' EXPLAINER_MODEL_PATH = "{}" MINIO_ACCESS_KEY = "{}" MINIO_HOST = "{}" MINIO_MODEL_BUCKET = "{}" MINIO_SECRET_KEY = "{}" '''.format(EXPLAINER_MODEL_PATH, MINIO_ACCESS_KEY, MINIO_HOST, MINIO_MODEL_BUCKET, MINIO_SECRET_KEY) from kale.utils import mlmd_utils as _kale_mlmd_utils _kale_mlmd_utils.init_metadata() data_loading_block = ''' # -----------------------DATA LOADING START-------------------------------- from kale.marshal import utils as _kale_marshal_utils _kale_marshal_utils.set_kale_data_directory("/marshal") _kale_marshal_utils.set_kale_directory_file_names() X_train = _kale_marshal_utils.load("X_train") category_map = _kale_marshal_utils.load("category_map") feature_names = _kale_marshal_utils.load("feature_names") model = _kale_marshal_utils.load("model") predict_fn = _kale_marshal_utils.load("predict_fn") # -----------------------DATA LOADING END---------------------------------- ''' block1 = ''' import numpy as np from sklearn.ensemble import RandomForestClassifier from sklearn.compose import ColumnTransformer from sklearn.pipeline import Pipeline from sklearn.impute import SimpleImputer from sklearn.metrics import accuracy_score from sklearn.preprocessing import StandardScaler, OneHotEncoder from alibi.explainers import AnchorTabular from alibi.datasets import fetch_adult from minio import Minio from minio.error import ResponseError from joblib import dump, load import dill import time import json from subprocess import run, Popen, PIPE from alibi_detect.utils.data import create_outlier_batch ''' block2 = ''' def get_minio(): return Minio(MINIO_HOST, access_key=MINIO_ACCESS_KEY, secret_key=MINIO_SECRET_KEY, secure=False) ''' block3 = ''' model.predict(X_train) explainer = AnchorTabular(predict_fn, feature_names, categorical_names=category_map) ''' block4 = ''' explainer.fit(X_train, disc_perc=[25, 50, 75]) ''' block5 = ''' with open("explainer.dill", "wb") as dill_file: dill.dump(explainer, dill_file) dill_file.close() print(get_minio().fput_object(MINIO_MODEL_BUCKET, f"{EXPLAINER_MODEL_PATH}/explainer.dill", 'explainer.dill')) ''' data_saving_block = ''' # -----------------------DATA SAVING START--------------------------------- from kale.marshal import utils as _kale_marshal_utils _kale_marshal_utils.set_kale_data_directory("/marshal") _kale_marshal_utils.save(X_train, "X_train") _kale_marshal_utils.save(explainer, "explainer") _kale_marshal_utils.save(model, "model") # -----------------------DATA SAVING END----------------------------------- ''' # run the code blocks inside a jupyter kernel from kale.utils.jupyter_utils import run_code as _kale_run_code from kale.utils.kfp_utils import \ update_uimetadata as _kale_update_uimetadata blocks = (pipeline_parameters_block, data_loading_block, block1, block2, block3, block4, block5, data_saving_block) html_artifact = _kale_run_code(blocks) with open("/train_explainer.html", "w") as f: f.write(html_artifact) _kale_update_uimetadata('train_explainer') _kale_mlmd_utils.call("mark_execution_complete")
def build_outlier(MINIO_ACCESS_KEY: str, MINIO_HOST: str, MINIO_MODEL_BUCKET: str, MINIO_SECRET_KEY: str, OUTLIER_MODEL_PATH: str): pipeline_parameters_block = ''' MINIO_ACCESS_KEY = "{}" MINIO_HOST = "{}" MINIO_MODEL_BUCKET = "{}" MINIO_SECRET_KEY = "{}" OUTLIER_MODEL_PATH = "{}" '''.format(MINIO_ACCESS_KEY, MINIO_HOST, MINIO_MODEL_BUCKET, MINIO_SECRET_KEY, OUTLIER_MODEL_PATH) from kale.utils import mlmd_utils as _kale_mlmd_utils _kale_mlmd_utils.init_metadata() data_loading_block = ''' # -----------------------DATA LOADING START-------------------------------- from kale.marshal import utils as _kale_marshal_utils _kale_marshal_utils.set_kale_data_directory("/marshal") _kale_marshal_utils.set_kale_directory_file_names() X_train = _kale_marshal_utils.load("X_train") Y_train = _kale_marshal_utils.load("Y_train") # -----------------------DATA LOADING END---------------------------------- ''' block1 = ''' import numpy as np from sklearn.ensemble import RandomForestClassifier from sklearn.compose import ColumnTransformer from sklearn.pipeline import Pipeline from sklearn.impute import SimpleImputer from sklearn.metrics import accuracy_score from sklearn.preprocessing import StandardScaler, OneHotEncoder from alibi.explainers import AnchorTabular from alibi.datasets import fetch_adult from minio import Minio from minio.error import ResponseError from joblib import dump, load import dill import time import json from subprocess import run, Popen, PIPE from alibi_detect.utils.data import create_outlier_batch ''' block2 = ''' def get_minio(): return Minio(MINIO_HOST, access_key=MINIO_ACCESS_KEY, secret_key=MINIO_SECRET_KEY, secure=False) ''' block3 = ''' from alibi_detect.od import IForest od = IForest( threshold=0., n_estimators=200, ) ''' block4 = ''' od.fit(X_train) ''' block5 = ''' np.random.seed(0) perc_outlier = 5 threshold_batch = create_outlier_batch(X_train, Y_train, n_samples=1000, perc_outlier=perc_outlier) X_threshold, y_threshold = threshold_batch.data.astype('float'), threshold_batch.target #X_threshold = (X_threshold - mean) / stdev print('{}% outliers'.format(100 * y_threshold.mean())) ''' block6 = ''' od.infer_threshold(X_threshold, threshold_perc=100-perc_outlier) print('New threshold: {}'.format(od.threshold)) threshold = od.threshold ''' block7 = ''' X_outlier = [[300, 4, 4, 2, 1, 4, 4, 0, 0, 0, 600, 9]] ''' block8 = ''' od.predict( X_outlier ) ''' block9 = ''' from alibi_detect.utils.saving import save_detector, load_detector from os import listdir from os.path import isfile, join filepath="ifoutlier" save_detector(od, filepath) onlyfiles = [f for f in listdir(filepath) if isfile(join(filepath, f))] for filename in onlyfiles: print(filename) print(get_minio().fput_object(MINIO_MODEL_BUCKET, f"{OUTLIER_MODEL_PATH}/{filename}", join(filepath, filename))) ''' # run the code blocks inside a jupyter kernel from kale.utils.jupyter_utils import run_code as _kale_run_code from kale.utils.kfp_utils import \ update_uimetadata as _kale_update_uimetadata blocks = ( pipeline_parameters_block, data_loading_block, block1, block2, block3, block4, block5, block6, block7, block8, block9, ) html_artifact = _kale_run_code(blocks) with open("/build_outlier.html", "w") as f: f.write(html_artifact) _kale_update_uimetadata('build_outlier') _kale_mlmd_utils.call("mark_execution_complete")
def datapreprocessing(): data_loading_block = ''' # -----------------------DATA LOADING START-------------------------------- from kale.marshal import utils as _kale_marshal_utils _kale_marshal_utils.set_kale_data_directory("/marshal") _kale_marshal_utils.set_kale_directory_file_names() test_df = _kale_marshal_utils.load("test_df") train_df = _kale_marshal_utils.load("train_df") # -----------------------DATA LOADING END---------------------------------- ''' block1 = ''' import numpy as np import pandas as pd import seaborn as sns from matplotlib import pyplot as plt from matplotlib import style from sklearn import linear_model from sklearn.linear_model import LogisticRegression from sklearn.ensemble import RandomForestClassifier from sklearn.linear_model import Perceptron from sklearn.linear_model import SGDClassifier from sklearn.tree import DecisionTreeClassifier from sklearn.neighbors import KNeighborsClassifier from sklearn.svm import SVC from sklearn.naive_bayes import GaussianNB ''' block2 = ''' data = [train_df, test_df] for dataset in data: dataset['relatives'] = dataset['SibSp'] + dataset['Parch'] dataset.loc[dataset['relatives'] > 0, 'not_alone'] = 0 dataset.loc[dataset['relatives'] == 0, 'not_alone'] = 1 dataset['not_alone'] = dataset['not_alone'].astype(int) train_df['not_alone'].value_counts() ''' block3 = ''' # This does not contribute to a person survival probability train_df = train_df.drop(['PassengerId'], axis=1) ''' block4 = ''' import re deck = {"A": 1, "B": 2, "C": 3, "D": 4, "E": 5, "F": 6, "G": 7, "U": 8} data = [train_df, test_df] for dataset in data: dataset['Cabin'] = dataset['Cabin'].fillna("U0") dataset['Deck'] = dataset['Cabin'].map(lambda x: re.compile("([a-zA-Z]+)").search(x).group()) dataset['Deck'] = dataset['Deck'].map(deck) dataset['Deck'] = dataset['Deck'].fillna(0) dataset['Deck'] = dataset['Deck'].astype(int) # we can now drop the cabin feature train_df = train_df.drop(['Cabin'], axis=1) test_df = test_df.drop(['Cabin'], axis=1) ''' block5 = ''' data = [train_df, test_df] for dataset in data: mean = train_df["Age"].mean() std = test_df["Age"].std() is_null = dataset["Age"].isnull().sum() # compute random numbers between the mean, std and is_null rand_age = np.random.randint(mean - std, mean + std, size = is_null) # fill NaN values in Age column with random values generated age_slice = dataset["Age"].copy() age_slice[np.isnan(age_slice)] = rand_age dataset["Age"] = age_slice dataset["Age"] = train_df["Age"].astype(int) train_df["Age"].isnull().sum() ''' block6 = ''' train_df['Embarked'].describe() ''' block7 = ''' # fill with most common value common_value = 'S' data = [train_df, test_df] for dataset in data: dataset['Embarked'] = dataset['Embarked'].fillna(common_value) ''' block8 = ''' train_df.info() ''' data_saving_block = ''' # -----------------------DATA SAVING START--------------------------------- from kale.marshal import utils as _kale_marshal_utils _kale_marshal_utils.set_kale_data_directory("/marshal") _kale_marshal_utils.save(test_df, "test_df") _kale_marshal_utils.save(train_df, "train_df") # -----------------------DATA SAVING END----------------------------------- ''' # run the code blocks inside a jupyter kernel from kale.utils.jupyter_utils import run_code as _kale_run_code from kale.utils.kfp_utils import \ update_uimetadata as _kale_update_uimetadata blocks = (data_loading_block, block1, block2, block3, block4, block5, block6, block7, block8, data_saving_block) html_artifact = _kale_run_code(blocks) with open("/datapreprocessing.html", "w") as f: f.write(html_artifact) _kale_update_uimetadata('datapreprocessing')
def explain(MINIO_ACCESS_KEY: str, MINIO_HOST: str, MINIO_SECRET_KEY: str): pipeline_parameters_block = ''' MINIO_ACCESS_KEY = "{}" MINIO_HOST = "{}" MINIO_SECRET_KEY = "{}" '''.format(MINIO_ACCESS_KEY, MINIO_HOST, MINIO_SECRET_KEY) from kale.utils import mlmd_utils as _kale_mlmd_utils _kale_mlmd_utils.init_metadata() data_loading_block = ''' # -----------------------DATA LOADING START-------------------------------- from kale.marshal import utils as _kale_marshal_utils _kale_marshal_utils.set_kale_data_directory("/marshal") _kale_marshal_utils.set_kale_directory_file_names() X_test = _kale_marshal_utils.load("X_test") X_train = _kale_marshal_utils.load("X_train") adult = _kale_marshal_utils.load("adult") explainer = _kale_marshal_utils.load("explainer") model = _kale_marshal_utils.load("model") # -----------------------DATA LOADING END---------------------------------- ''' block1 = ''' import numpy as np from sklearn.ensemble import RandomForestClassifier from sklearn.compose import ColumnTransformer from sklearn.pipeline import Pipeline from sklearn.impute import SimpleImputer from sklearn.metrics import accuracy_score from sklearn.preprocessing import StandardScaler, OneHotEncoder from alibi.explainers import AnchorTabular from alibi.datasets import fetch_adult from minio import Minio from minio.error import ResponseError from joblib import dump, load import dill import time import json from subprocess import run, Popen, PIPE from alibi_detect.utils.data import create_outlier_batch ''' block2 = ''' def get_minio(): return Minio(MINIO_HOST, access_key=MINIO_ACCESS_KEY, secret_key=MINIO_SECRET_KEY, secure=False) ''' block3 = ''' model.predict(X_train) idx = 0 class_names = adult.target_names print('Prediction: ', class_names[explainer.predict_fn(X_test[idx].reshape(1, -1))[0]]) ''' block4 = ''' explanation = explainer.explain(X_test[idx], threshold=0.95) print('Anchor: %s' % (' AND '.join(explanation['names']))) print('Precision: %.2f' % explanation['precision']) print('Coverage: %.2f' % explanation['coverage']) ''' # run the code blocks inside a jupyter kernel from kale.utils.jupyter_utils import run_code as _kale_run_code from kale.utils.kfp_utils import \ update_uimetadata as _kale_update_uimetadata blocks = ( pipeline_parameters_block, data_loading_block, block1, block2, block3, block4, ) html_artifact = _kale_run_code(blocks) with open("/explain.html", "w") as f: f.write(html_artifact) _kale_update_uimetadata('explain') _kale_mlmd_utils.call("mark_execution_complete")