def final_auto_snapshot(): from kale.common import mlmdutils as _kale_mlmdutils _kale_mlmdutils.init_metadata() from kale.marshal.decorator import marshal as _kale_marshal from kale.common.runutils import link_artifacts as _kale_link_artifacts _kale_pipeline_parameters = {} @_kale_marshal([], [], _kale_pipeline_parameters, "/marshal") def _no_op(): pass _no_op() _kale_artifacts = {} _kale_link_artifacts(_kale_artifacts) from kale.common import rokutils as _kale_rokutils _kale_mlmdutils.call("link_input_rok_artifacts") _rok_snapshot_task = _kale_rokutils.snapshot_pipeline_step( "test", "final_auto_snapshot", "", before=False) _kale_mlmdutils.call("submit_output_rok_artifact", _rok_snapshot_task) _kale_mlmdutils.call("mark_execution_complete")
def step2(a: int, c: int): from kale.common import mlmdutils as _kale_mlmdutils _kale_mlmdutils.init_metadata() from kale.common import rokutils as _kale_rokutils _kale_mlmdutils.call("link_input_rok_artifacts") _kale_rokutils.snapshot_pipeline_step( "test-test", "step2", "", before=True) from kale.marshal.decorator import marshal pipeline_parameters = {"a": a, "c": c} @marshal(['c', 'a', 'data'], ['res'], pipeline_parameters, "/marshal") def step2(var1, var2, data): print(var1 + var2) return 'Test' step2() _rok_snapshot_task = _kale_rokutils.snapshot_pipeline_step( "test-test", "step2", "", before=False) _kale_mlmdutils.call("submit_output_rok_artifact", _rok_snapshot_task) _kale_mlmdutils.call("mark_execution_complete")
def results(): from kale.common import mlmdutils as _kale_mlmdutils _kale_mlmdutils.init_metadata() _kale_data_loading_block = ''' # -----------------------DATA LOADING START-------------------------------- from kale import marshal as _kale_marshal _kale_marshal.set_data_dir("/marshal") acc_decision_tree = _kale_marshal.load("acc_decision_tree") acc_gaussian = _kale_marshal.load("acc_gaussian") acc_linear_svc = _kale_marshal.load("acc_linear_svc") acc_log = _kale_marshal.load("acc_log") acc_random_forest = _kale_marshal.load("acc_random_forest") # -----------------------DATA LOADING END---------------------------------- ''' _kale_block1 = ''' import numpy as np import pandas as pd import seaborn as sns from matplotlib import pyplot as plt from matplotlib import style from sklearn import linear_model from sklearn.linear_model import LogisticRegression from sklearn.ensemble import RandomForestClassifier from sklearn.linear_model import Perceptron from sklearn.linear_model import SGDClassifier from sklearn.tree import DecisionTreeClassifier from sklearn.neighbors import KNeighborsClassifier from sklearn.svm import SVC from sklearn.naive_bayes import GaussianNB ''' _kale_block2 = ''' results = pd.DataFrame({ 'Model': ['Support Vector Machines', 'logistic Regression', 'Random Forest', 'Naive Bayes', 'Decision Tree'], 'Score': [acc_linear_svc, acc_log, acc_random_forest, acc_gaussian, acc_decision_tree]}) result_df = results.sort_values(by='Score', ascending=False) result_df = result_df.set_index('Score') print(result_df) ''' # run the code blocks inside a jupyter kernel from kale.common.jputils import run_code as _kale_run_code from kale.common.kfputils import \ update_uimetadata as _kale_update_uimetadata _kale_blocks = ( _kale_data_loading_block, _kale_block1, _kale_block2, ) _kale_html_artifact = _kale_run_code(_kale_blocks) with open("/results.html", "w") as f: f.write(_kale_html_artifact) _kale_update_uimetadata('results') _kale_mlmdutils.call("mark_execution_complete")
def test(): from kale.common import mlmdutils as _kale_mlmdutils _kale_mlmdutils.init_metadata() block1 = ''' v1 = "Hello" ''' block2 = ''' print(v1) ''' data_saving_block = ''' # -----------------------DATA SAVING START--------------------------------- from kale.marshal import utils as _kale_marshal_utils _kale_marshal_utils.set_kale_data_directory("") _kale_marshal_utils.save(v1, "v1") # -----------------------DATA SAVING END----------------------------------- ''' # run the code blocks inside a jupyter kernel from kale.common.jputils import run_code as _kale_run_code from kale.common.kfputils import \ update_uimetadata as _kale_update_uimetadata blocks = (block1, block2, data_saving_block) html_artifact = _kale_run_code(blocks) with open("/test.html", "w") as f: f.write(html_artifact) _kale_update_uimetadata('test') _kale_mlmdutils.call("mark_execution_complete")
def step1(): from kale.common import mlmdutils as _kale_mlmdutils _kale_mlmdutils.init_metadata() from kale.common import rokutils as _kale_rokutils _kale_mlmdutils.call("link_input_rok_artifacts") _kale_rokutils.snapshot_pipeline_step( "test-test", "step1", "", before=True) from kale.marshal.decorator import marshal pipeline_parameters = {} @marshal([], ['data'], pipeline_parameters, "/marshal") def step1(): return 10 step1() _rok_snapshot_task = _kale_rokutils.snapshot_pipeline_step( "test-test", "step1", "", before=False) _kale_mlmdutils.call("submit_output_rok_artifact", _rok_snapshot_task) _kale_mlmdutils.call("mark_execution_complete")
def step3(b: str): from kale.common import mlmdutils as _kale_mlmdutils _kale_mlmdutils.init_metadata() from kale.common import rokutils as _kale_rokutils _kale_mlmdutils.call("link_input_rok_artifacts") _kale_rokutils.snapshot_pipeline_step( "test-test", "step3", "", before=True) from kale.marshal.decorator import marshal pipeline_parameters = {"b": b} @marshal(['b', 'data'], [], pipeline_parameters, "/marshal") def step3(st, st2): print(st) step3() _rok_snapshot_task = _kale_rokutils.snapshot_pipeline_step( "test-test", "step3", "", before=False) _kale_mlmdutils.call("submit_output_rok_artifact", _rok_snapshot_task) _kale_mlmdutils.call("mark_execution_complete")
def randomforest(): from kale.common import mlmdutils as _kale_mlmdutils _kale_mlmdutils.init_metadata() data_loading_block = ''' # -----------------------DATA LOADING START-------------------------------- from kale.marshal import utils as _kale_marshal_utils _kale_marshal_utils.set_kale_data_directory("/marshal") train_df = _kale_marshal_utils.load("train_df") train_labels = _kale_marshal_utils.load("train_labels") # -----------------------DATA LOADING END---------------------------------- ''' block1 = ''' import numpy as np import pandas as pd import seaborn as sns from matplotlib import pyplot as plt from matplotlib import style from sklearn import linear_model from sklearn.linear_model import LogisticRegression from sklearn.ensemble import RandomForestClassifier from sklearn.linear_model import Perceptron from sklearn.linear_model import SGDClassifier from sklearn.tree import DecisionTreeClassifier from sklearn.neighbors import KNeighborsClassifier from sklearn.svm import SVC from sklearn.naive_bayes import GaussianNB ''' block2 = ''' random_forest = RandomForestClassifier(n_estimators=100) random_forest.fit(train_df, train_labels) acc_random_forest = round(random_forest.score(train_df, train_labels) * 100, 2) ''' data_saving_block = ''' # -----------------------DATA SAVING START--------------------------------- from kale.marshal import utils as _kale_marshal_utils _kale_marshal_utils.set_kale_data_directory("/marshal") _kale_marshal_utils.save(acc_random_forest, "acc_random_forest") # -----------------------DATA SAVING END----------------------------------- ''' # run the code blocks inside a jupyter kernel from kale.common.jputils import run_code as _kale_run_code from kale.common.kfputils import \ update_uimetadata as _kale_update_uimetadata blocks = (data_loading_block, block1, block2, data_saving_block) html_artifact = _kale_run_code(blocks) with open("/randomforest.html", "w") as f: f.write(html_artifact) _kale_update_uimetadata('randomforest') _kale_mlmdutils.call("mark_execution_complete")
def logisticregression(): from kale.common import mlmdutils as _kale_mlmdutils _kale_mlmdutils.init_metadata() _kale_data_loading_block = ''' # -----------------------DATA LOADING START-------------------------------- from kale import marshal as _kale_marshal _kale_marshal.set_data_dir("/marshal") train_df = _kale_marshal.load("train_df") train_labels = _kale_marshal.load("train_labels") # -----------------------DATA LOADING END---------------------------------- ''' _kale_block1 = ''' import numpy as np import pandas as pd import seaborn as sns from matplotlib import pyplot as plt from matplotlib import style from sklearn import linear_model from sklearn.linear_model import LogisticRegression from sklearn.ensemble import RandomForestClassifier from sklearn.linear_model import Perceptron from sklearn.linear_model import SGDClassifier from sklearn.tree import DecisionTreeClassifier from sklearn.neighbors import KNeighborsClassifier from sklearn.svm import SVC from sklearn.naive_bayes import GaussianNB ''' _kale_block2 = ''' logreg = LogisticRegression(solver='lbfgs', max_iter=110) logreg.fit(train_df, train_labels) acc_log = round(logreg.score(train_df, train_labels) * 100, 2) ''' _kale_data_saving_block = ''' # -----------------------DATA SAVING START--------------------------------- from kale import marshal as _kale_marshal _kale_marshal.set_data_dir("/marshal") _kale_marshal.save(acc_log, "acc_log") # -----------------------DATA SAVING END----------------------------------- ''' # run the code blocks inside a jupyter kernel from kale.common.jputils import run_code as _kale_run_code from kale.common.kfputils import \ update_uimetadata as _kale_update_uimetadata _kale_blocks = (_kale_data_loading_block, _kale_block1, _kale_block2, _kale_data_saving_block) _kale_html_artifact = _kale_run_code(_kale_blocks) with open("/logisticregression.html", "w") as f: f.write(_kale_html_artifact) _kale_update_uimetadata('logisticregression') _kale_mlmdutils.call("mark_execution_complete")
def uploadtos3(date_of_processing: str): _kale_pipeline_parameters_block = ''' date_of_processing = "{}" '''.format(date_of_processing) from kale.common import mlmdutils as _kale_mlmdutils _kale_mlmdutils.init_metadata() _kale_data_loading_block = ''' # -----------------------DATA LOADING START-------------------------------- from kale import marshal as _kale_marshal _kale_marshal.set_data_dir("/shared_volume/kube_sipecam_playground/hsi/notebooks/.hsi_using_r2py.ipynb.kale.marshal.dir") save_dir = _kale_marshal.load("save_dir") # -----------------------DATA LOADING END---------------------------------- ''' _kale_block1 = ''' import os import subprocess import glob from IPython import get_ipython ipython = get_ipython() ''' _kale_block2 = ''' dir_to_upload = glob.glob(save_dir + '*')[0] bucket_results = "s3://hsi-kale-results" bucket_path_uploading = os.path.join(bucket_results, date_of_processing) cmd_subprocess = ["aws", "s3", "cp", dir_to_upload, bucket_path_uploading, "--recursive"] subprocess.run(cmd_subprocess) ''' # run the code blocks inside a jupyter kernel from kale.common.jputils import run_code as _kale_run_code from kale.common.kfputils import \ update_uimetadata as _kale_update_uimetadata _kale_blocks = ( _kale_pipeline_parameters_block, _kale_data_loading_block, _kale_block1, _kale_block2, ) _kale_html_artifact = _kale_run_code(_kale_blocks) with open("/uploadtos3.html", "w") as f: f.write(_kale_html_artifact) _kale_update_uimetadata('uploadtos3') _kale_mlmdutils.call("mark_execution_complete")
def integral5(density_p: int): _kale_pipeline_parameters_block = ''' density_p = {} '''.format(density_p) from kale.common import mlmdutils as _kale_mlmdutils _kale_mlmdutils.init_metadata() _kale_block1 = ''' import numpy as np import math ''' _kale_block2 = ''' def compute_error(obj,approx): \'\'\' Relative or absolute error between obj and approx. \'\'\' if math.fabs(obj) > np.nextafter(0,1): Err = math.fabs(obj-approx)/math.fabs(obj) else: Err = math.fabs(obj-approx) return Err ''' _kale_block3 = ''' f = lambda x,y,z:(x+2*y+3*z)**2 a1 = 0 b1 = 1 a2 = -1/2 b2 = 0 a3 = 0 b3 = 1/3 x_p = np.random.uniform(a1,b1,density_p) y_p = np.random.uniform(a2,b2,density_p) z_p = np.random.uniform(a3,b3,density_p) obj = 1/12 vol = (b1-a1)*(b2-a2)*(b3-a3) ex_5 = vol*np.mean(f(x_p,y_p,z_p)) print("error relativo: {:0.4e}".format(compute_error(obj, ex_5))) ''' # run the code blocks inside a jupyter kernel from kale.common.jputils import run_code as _kale_run_code from kale.common.kfputils import \ update_uimetadata as _kale_update_uimetadata _kale_blocks = ( _kale_pipeline_parameters_block, _kale_block1, _kale_block2, _kale_block3, ) _kale_html_artifact = _kale_run_code(_kale_blocks) with open("/integral5.html", "w") as f: f.write(_kale_html_artifact) _kale_update_uimetadata('integral5') _kale_mlmdutils.call("mark_execution_complete")
def loaddata(): from kale.common import mlmdutils as _kale_mlmdutils _kale_mlmdutils.init_metadata() block1 = ''' import numpy as np import pandas as pd import seaborn as sns from matplotlib import pyplot as plt from matplotlib import style from sklearn import linear_model from sklearn.linear_model import LogisticRegression from sklearn.ensemble import RandomForestClassifier from sklearn.linear_model import Perceptron from sklearn.linear_model import SGDClassifier from sklearn.tree import DecisionTreeClassifier from sklearn.neighbors import KNeighborsClassifier from sklearn.svm import SVC from sklearn.naive_bayes import GaussianNB ''' block2 = ''' path = "data/" PREDICTION_LABEL = 'Survived' test_df = pd.read_csv(path + "test.csv") train_df = pd.read_csv(path + "train.csv") ''' data_saving_block = ''' # -----------------------DATA SAVING START--------------------------------- from kale.marshal import utils as _kale_marshal_utils _kale_marshal_utils.set_kale_data_directory("/marshal") _kale_marshal_utils.save(PREDICTION_LABEL, "PREDICTION_LABEL") _kale_marshal_utils.save(test_df, "test_df") _kale_marshal_utils.save(train_df, "train_df") # -----------------------DATA SAVING END----------------------------------- ''' # run the code blocks inside a jupyter kernel from kale.common.jputils import run_code as _kale_run_code from kale.common.kfputils import \ update_uimetadata as _kale_update_uimetadata blocks = ( block1, block2, data_saving_block) html_artifact = _kale_run_code(blocks) with open("/loaddata.html", "w") as f: f.write(html_artifact) _kale_update_uimetadata('loaddata') _kale_mlmdutils.call("mark_execution_complete")
def final_auto_snapshot(): from kale.common import mlmdutils as _kale_mlmdutils _kale_mlmdutils.init_metadata() from kale.common import rokutils as _kale_rokutils _kale_mlmdutils.call("link_input_rok_artifacts") _rok_snapshot_task = _kale_rokutils.snapshot_pipeline_step( "test", "final_auto_snapshot", "/path/to/nb", before=False) _kale_mlmdutils.call("submit_output_rok_artifact", _rok_snapshot_task) _kale_mlmdutils.call("mark_execution_complete")
def integral6(density_p: int): _kale_pipeline_parameters_block = ''' density_p = {} '''.format(density_p) from kale.common import mlmdutils as _kale_mlmdutils _kale_mlmdutils.init_metadata() _kale_block1 = ''' import numpy as np import math ''' _kale_block2 = ''' def compute_error(obj,approx): \'\'\' Relative or absolute error between obj and approx. \'\'\' if math.fabs(obj) > np.nextafter(0,1): Err = math.fabs(obj-approx)/math.fabs(obj) else: Err = math.fabs(obj-approx) return Err ''' _kale_block3 = ''' f = lambda x: 4/(1+x**2) x_p = np.random.uniform(0,1,density_p) obj = math.pi a = 0 b = 1 vol = b-a f_bar = np.mean(f(x_p)) ex_6 = vol*f_bar print("error relativo: {:0.4e}".format(compute_error(obj,ex_6 ))) ''' # run the code blocks inside a jupyter kernel from kale.common.jputils import run_code as _kale_run_code from kale.common.kfputils import \ update_uimetadata as _kale_update_uimetadata _kale_blocks = ( _kale_pipeline_parameters_block, _kale_block1, _kale_block2, _kale_block3, ) _kale_html_artifact = _kale_run_code(_kale_blocks) with open("/integral6.html", "w") as f: f.write(_kale_html_artifact) _kale_update_uimetadata('integral6') _kale_mlmdutils.call("mark_execution_complete")
def downloadfroms3(): from kale.common import mlmdutils as _kale_mlmdutils _kale_mlmdutils.init_metadata() _kale_block1 = ''' import os import subprocess import glob from IPython import get_ipython ipython = get_ipython() ''' _kale_block2 = ''' bucket_with_data = "hsi-kale" input_dir_data = "/shared_volume/input_data" if not os.path.exists(input_dir_data): os.makedirs(input_dir_data) cmd_subprocess = ["aws", "s3", "cp", "s3://" + bucket_with_data, input_dir_data, "--recursive"] subprocess.run(cmd_subprocess) ''' _kale_data_saving_block = ''' # -----------------------DATA SAVING START--------------------------------- from kale import marshal as _kale_marshal _kale_marshal.set_data_dir("/shared_volume/kube_sipecam_playground/hsi/notebooks/.hsi_using_r2py.ipynb.kale.marshal.dir") _kale_marshal.save(input_dir_data, "input_dir_data") # -----------------------DATA SAVING END----------------------------------- ''' # run the code blocks inside a jupyter kernel from kale.common.jputils import run_code as _kale_run_code from kale.common.kfputils import \ update_uimetadata as _kale_update_uimetadata _kale_blocks = (_kale_block1, _kale_block2, _kale_data_saving_block) _kale_html_artifact = _kale_run_code(_kale_blocks) with open("/downloadfroms3.html", "w") as f: f.write(_kale_html_artifact) _kale_update_uimetadata('downloadfroms3') _kale_mlmdutils.call("mark_execution_complete")
def create_matrix(d1: int, d2: int): _kale_pipeline_parameters_block = ''' d1 = {} d2 = {} '''.format(d1, d2) from kale.common import mlmdutils as _kale_mlmdutils _kale_mlmdutils.init_metadata() _kale_block1 = ''' import numpy as np ''' _kale_block2 = ''' rnd_matrix = np.random.rand(d1, d2) ''' _kale_block3 = ''' from kale.common import kfputils as _kale_kfputils _kale_kfp_metrics = { "d1": d1, "d2": d2 } _kale_kfputils.generate_mlpipeline_metrics(_kale_kfp_metrics) ''' _kale_data_saving_block = ''' # -----------------------DATA SAVING START--------------------------------- from kale import marshal as _kale_marshal _kale_marshal.set_data_dir("/marshal") _kale_marshal.save(rnd_matrix, "rnd_matrix") # -----------------------DATA SAVING END----------------------------------- ''' # run the code blocks inside a jupyter kernel from kale.common.jputils import run_code as _kale_run_code from kale.common.kfputils import \ update_uimetadata as _kale_update_uimetadata _kale_blocks = (_kale_pipeline_parameters_block, _kale_block1, _kale_block2, _kale_block3, _kale_data_saving_block) _kale_html_artifact = _kale_run_code(_kale_blocks) with open("/create_matrix.html", "w") as f: f.write(_kale_html_artifact) _kale_update_uimetadata('create_matrix') _kale_mlmdutils.call("mark_execution_complete")
def step3(): from kale.common import mlmdutils as _kale_mlmdutils _kale_mlmdutils.init_metadata() from kale.marshal.decorator import marshal pipeline_parameters = {} @marshal(['_a', '_c'], [], pipeline_parameters, "/marshal") def step3(a, c): d = c + a print(d) step3() _kale_mlmdutils.call("mark_execution_complete")
def step2(): from kale.common import mlmdutils as _kale_mlmdutils _kale_mlmdutils.init_metadata() from kale.marshal.decorator import marshal pipeline_parameters = {} @marshal(['_b', '_a'], ['_c'], pipeline_parameters, "/marshal") def step2(a, b): c = a + b print(c) return c step2() _kale_mlmdutils.call("mark_execution_complete")
def step1(): from kale.common import mlmdutils as _kale_mlmdutils _kale_mlmdutils.init_metadata() from kale.marshal.decorator import marshal pipeline_parameters = {} @marshal([], ['_b', '_a'], pipeline_parameters, "/marshal") def step1(): a = 1 b = 2 return a, b step1() _kale_mlmdutils.call("mark_execution_complete")
def sum_matrix(): from kale.common import mlmdutils as _kale_mlmdutils _kale_mlmdutils.init_metadata() _kale_data_loading_block = ''' # -----------------------DATA LOADING START-------------------------------- from kale import marshal as _kale_marshal _kale_marshal.set_data_dir("/marshal") rnd_matrix = _kale_marshal.load("rnd_matrix") # -----------------------DATA LOADING END---------------------------------- ''' _kale_block1 = ''' import numpy as np ''' _kale_block2 = ''' sum_result = rnd_matrix.sum() ''' _kale_block3 = ''' from kale.common import kfputils as _kale_kfputils _kale_kfp_metrics = { "sum-result": sum_result } _kale_kfputils.generate_mlpipeline_metrics(_kale_kfp_metrics) ''' # run the code blocks inside a jupyter kernel from kale.common.jputils import run_code as _kale_run_code from kale.common.kfputils import \ update_uimetadata as _kale_update_uimetadata _kale_blocks = ( _kale_data_loading_block, _kale_block1, _kale_block2, _kale_block3, ) _kale_html_artifact = _kale_run_code(_kale_blocks) with open("/sum_matrix.html", "w") as f: f.write(_kale_html_artifact) _kale_update_uimetadata('sum_matrix') _kale_mlmdutils.call("mark_execution_complete")
def test(): from kale.common import mlmdutils as _kale_mlmdutils _kale_mlmdutils.init_metadata() _kale_block1 = ''' print("hello") ''' # run the code blocks inside a jupyter kernel from kale.common.jputils import run_code as _kale_run_code from kale.common.kfputils import \ update_uimetadata as _kale_update_uimetadata _kale_blocks = (_kale_block1, ) _kale_html_artifact = _kale_run_code(_kale_blocks) with open("/test.html", "w") as f: f.write(_kale_html_artifact) _kale_update_uimetadata('test') _kale_mlmdutils.call("mark_execution_complete")
def step3(): from kale.common import mlmdutils as _kale_mlmdutils _kale_mlmdutils.init_metadata() from kale.marshal.decorator import marshal as _kale_marshal from kale.common.runutils import link_artifacts as _kale_link_artifacts _kale_pipeline_parameters = {} @_kale_marshal(['_a', '_c'], [], _kale_pipeline_parameters, "/marshal") def step3(a, c): d = c + a print(d) step3() _kale_artifacts = {} _kale_link_artifacts(_kale_artifacts) _kale_mlmdutils.call("mark_execution_complete")
def step1(): from kale.common import mlmdutils as _kale_mlmdutils _kale_mlmdutils.init_metadata() from kale.marshal.decorator import marshal as _kale_marshal from kale.common.runutils import link_artifacts as _kale_link_artifacts _kale_pipeline_parameters = {} @_kale_marshal([], ['_b', '_a'], _kale_pipeline_parameters, "/marshal") def step1(): a = 1 b = 2 return a, b step1() _kale_artifacts = {} _kale_link_artifacts(_kale_artifacts) _kale_mlmdutils.call("mark_execution_complete")
def step2(): from kale.common import mlmdutils as _kale_mlmdutils _kale_mlmdutils.init_metadata() from kale.common.runutils import ttl as _kale_ttl from kale.marshal.decorator import marshal as _kale_marshal from kale.common.runutils import link_artifacts as _kale_link_artifacts _kale_pipeline_parameters = {} @_kale_ttl(5) @_kale_marshal(['_b', '_a'], ['_c'], _kale_pipeline_parameters, "/marshal") def step2(a, b): c = a + b print(c) return c step2() _kale_artifacts = {} _kale_link_artifacts(_kale_artifacts) _kale_mlmdutils.call("mark_execution_complete")
def test(): from kale.common import mlmdutils as _kale_mlmdutils _kale_mlmdutils.init_metadata() _kale_data_loading_block = ''' # -----------------------DATA LOADING START-------------------------------- from kale.marshal import utils as _kale_marshal_utils _kale_marshal_utils.set_kale_data_directory("/marshal") v1 = _kale_marshal_utils.load("v1") # -----------------------DATA LOADING END---------------------------------- ''' # run the code blocks inside a jupyter kernel from kale.common.jputils import run_code as _kale_run_code from kale.common.kfputils import \ update_uimetadata as _kale_update_uimetadata _kale_blocks = (_kale_data_loading_block, ) _kale_html_artifact = _kale_run_code(_kale_blocks) with open("/test.html", "w") as f: f.write(_kale_html_artifact) _kale_update_uimetadata('test') _kale_mlmdutils.call("mark_execution_complete")
def datapreprocessing(): from kale.common import mlmdutils as _kale_mlmdutils _kale_mlmdutils.init_metadata() _kale_data_loading_block = ''' # -----------------------DATA LOADING START-------------------------------- from kale import marshal as _kale_marshal _kale_marshal.set_data_dir("/marshal") test_df = _kale_marshal.load("test_df") train_df = _kale_marshal.load("train_df") # -----------------------DATA LOADING END---------------------------------- ''' _kale_block1 = ''' import numpy as np import pandas as pd import seaborn as sns from matplotlib import pyplot as plt from matplotlib import style from sklearn import linear_model from sklearn.linear_model import LogisticRegression from sklearn.ensemble import RandomForestClassifier from sklearn.linear_model import Perceptron from sklearn.linear_model import SGDClassifier from sklearn.tree import DecisionTreeClassifier from sklearn.neighbors import KNeighborsClassifier from sklearn.svm import SVC from sklearn.naive_bayes import GaussianNB ''' _kale_block2 = ''' data = [train_df, test_df] for dataset in data: dataset['relatives'] = dataset['SibSp'] + dataset['Parch'] dataset.loc[dataset['relatives'] > 0, 'not_alone'] = 0 dataset.loc[dataset['relatives'] == 0, 'not_alone'] = 1 dataset['not_alone'] = dataset['not_alone'].astype(int) train_df['not_alone'].value_counts() ''' _kale_block3 = ''' # This does not contribute to a person survival probability train_df = train_df.drop(['PassengerId'], axis=1) ''' _kale_block4 = ''' import re deck = {"A": 1, "B": 2, "C": 3, "D": 4, "E": 5, "F": 6, "G": 7, "U": 8} data = [train_df, test_df] for dataset in data: dataset['Cabin'] = dataset['Cabin'].fillna("U0") dataset['Deck'] = dataset['Cabin'].map(lambda x: re.compile("([a-zA-Z]+)").search(x).group()) dataset['Deck'] = dataset['Deck'].map(deck) dataset['Deck'] = dataset['Deck'].fillna(0) dataset['Deck'] = dataset['Deck'].astype(int) # we can now drop the cabin feature train_df = train_df.drop(['Cabin'], axis=1) test_df = test_df.drop(['Cabin'], axis=1) ''' _kale_block5 = ''' data = [train_df, test_df] for dataset in data: mean = train_df["Age"].mean() std = test_df["Age"].std() is_null = dataset["Age"].isnull().sum() # compute random numbers between the mean, std and is_null rand_age = np.random.randint(mean - std, mean + std, size = is_null) # fill NaN values in Age column with random values generated age_slice = dataset["Age"].copy() age_slice[np.isnan(age_slice)] = rand_age dataset["Age"] = age_slice dataset["Age"] = train_df["Age"].astype(int) train_df["Age"].isnull().sum() ''' _kale_block6 = ''' train_df['Embarked'].describe() ''' _kale_block7 = ''' # fill with most common value common_value = 'S' data = [train_df, test_df] for dataset in data: dataset['Embarked'] = dataset['Embarked'].fillna(common_value) ''' _kale_block8 = ''' train_df.info() ''' _kale_data_saving_block = ''' # -----------------------DATA SAVING START--------------------------------- from kale import marshal as _kale_marshal _kale_marshal.set_data_dir("/marshal") _kale_marshal.save(test_df, "test_df") _kale_marshal.save(train_df, "train_df") # -----------------------DATA SAVING END----------------------------------- ''' # run the code blocks inside a jupyter kernel from kale.common.jputils import run_code as _kale_run_code from kale.common.kfputils import \ update_uimetadata as _kale_update_uimetadata _kale_blocks = (_kale_data_loading_block, _kale_block1, _kale_block2, _kale_block3, _kale_block4, _kale_block5, _kale_block6, _kale_block7, _kale_block8, _kale_data_saving_block) _kale_html_artifact = _kale_run_code(_kale_blocks) with open("/datapreprocessing.html", "w") as f: f.write(_kale_html_artifact) _kale_update_uimetadata('datapreprocessing') _kale_mlmdutils.call("mark_execution_complete")
def featureengineering(): from kale.common import mlmdutils as _kale_mlmdutils _kale_mlmdutils.init_metadata() _kale_data_loading_block = ''' # -----------------------DATA LOADING START-------------------------------- from kale import marshal as _kale_marshal _kale_marshal.set_data_dir("/marshal") PREDICTION_LABEL = _kale_marshal.load("PREDICTION_LABEL") test_df = _kale_marshal.load("test_df") train_df = _kale_marshal.load("train_df") # -----------------------DATA LOADING END---------------------------------- ''' _kale_block1 = ''' import numpy as np import pandas as pd import seaborn as sns from matplotlib import pyplot as plt from matplotlib import style from sklearn import linear_model from sklearn.linear_model import LogisticRegression from sklearn.ensemble import RandomForestClassifier from sklearn.linear_model import Perceptron from sklearn.linear_model import SGDClassifier from sklearn.tree import DecisionTreeClassifier from sklearn.neighbors import KNeighborsClassifier from sklearn.svm import SVC from sklearn.naive_bayes import GaussianNB ''' _kale_block2 = ''' data = [train_df, test_df] for dataset in data: dataset['Fare'] = dataset['Fare'].fillna(0) dataset['Fare'] = dataset['Fare'].astype(int) ''' _kale_block3 = ''' data = [train_df, test_df] titles = {"Mr": 1, "Miss": 2, "Mrs": 3, "Master": 4, "Rare": 5} for dataset in data: # extract titles dataset['Title'] = dataset.Name.str.extract(' ([A-Za-z]+)\\.', expand=False) # replace titles with a more common title or as Rare dataset['Title'] = dataset['Title'].replace(['Lady', 'Countess','Capt', 'Col','Don', 'Dr',\\ 'Major', 'Rev', 'Sir', 'Jonkheer', 'Dona'], 'Rare') dataset['Title'] = dataset['Title'].replace('Mlle', 'Miss') dataset['Title'] = dataset['Title'].replace('Ms', 'Miss') dataset['Title'] = dataset['Title'].replace('Mme', 'Mrs') # convert titles into numbers dataset['Title'] = dataset['Title'].map(titles) # filling NaN with 0, to get safe dataset['Title'] = dataset['Title'].fillna(0) train_df = train_df.drop(['Name'], axis=1) test_df = test_df.drop(['Name'], axis=1) ''' _kale_block4 = ''' genders = {"male": 0, "female": 1} data = [train_df, test_df] for dataset in data: dataset['Sex'] = dataset['Sex'].map(genders) ''' _kale_block5 = ''' train_df = train_df.drop(['Ticket'], axis=1) test_df = test_df.drop(['Ticket'], axis=1) ''' _kale_block6 = ''' ports = {"S": 0, "C": 1, "Q": 2} data = [train_df, test_df] for dataset in data: dataset['Embarked'] = dataset['Embarked'].map(ports) ''' _kale_block7 = ''' data = [train_df, test_df] for dataset in data: dataset['Age'] = dataset['Age'].astype(int) dataset.loc[ dataset['Age'] <= 11, 'Age'] = 0 dataset.loc[(dataset['Age'] > 11) & (dataset['Age'] <= 18), 'Age'] = 1 dataset.loc[(dataset['Age'] > 18) & (dataset['Age'] <= 22), 'Age'] = 2 dataset.loc[(dataset['Age'] > 22) & (dataset['Age'] <= 27), 'Age'] = 3 dataset.loc[(dataset['Age'] > 27) & (dataset['Age'] <= 33), 'Age'] = 4 dataset.loc[(dataset['Age'] > 33) & (dataset['Age'] <= 40), 'Age'] = 5 dataset.loc[(dataset['Age'] > 40) & (dataset['Age'] <= 66), 'Age'] = 6 dataset.loc[ dataset['Age'] > 66, 'Age'] = 6 # let's see how it's distributed train_df['Age'].value_counts() ''' _kale_block8 = ''' data = [train_df, test_df] for dataset in data: dataset.loc[ dataset['Fare'] <= 7.91, 'Fare'] = 0 dataset.loc[(dataset['Fare'] > 7.91) & (dataset['Fare'] <= 14.454), 'Fare'] = 1 dataset.loc[(dataset['Fare'] > 14.454) & (dataset['Fare'] <= 31), 'Fare'] = 2 dataset.loc[(dataset['Fare'] > 31) & (dataset['Fare'] <= 99), 'Fare'] = 3 dataset.loc[(dataset['Fare'] > 99) & (dataset['Fare'] <= 250), 'Fare'] = 4 dataset.loc[ dataset['Fare'] > 250, 'Fare'] = 5 dataset['Fare'] = dataset['Fare'].astype(int) ''' _kale_block9 = ''' data = [train_df, test_df] for dataset in data: dataset['Age_Class']= dataset['Age']* dataset['Pclass'] ''' _kale_block10 = ''' for dataset in data: dataset['Fare_Per_Person'] = dataset['Fare']/(dataset['relatives']+1) dataset['Fare_Per_Person'] = dataset['Fare_Per_Person'].astype(int) # Let's take a last look at the training set, before we start training the models. train_df.head(10) ''' _kale_block11 = ''' train_labels = train_df[PREDICTION_LABEL] train_df = train_df.drop(PREDICTION_LABEL, axis=1) ''' _kale_data_saving_block = ''' # -----------------------DATA SAVING START--------------------------------- from kale import marshal as _kale_marshal _kale_marshal.set_data_dir("/marshal") _kale_marshal.save(train_df, "train_df") _kale_marshal.save(train_labels, "train_labels") # -----------------------DATA SAVING END----------------------------------- ''' # run the code blocks inside a jupyter kernel from kale.common.jputils import run_code as _kale_run_code from kale.common.kfputils import \ update_uimetadata as _kale_update_uimetadata _kale_blocks = (_kale_data_loading_block, _kale_block1, _kale_block2, _kale_block3, _kale_block4, _kale_block5, _kale_block6, _kale_block7, _kale_block8, _kale_block9, _kale_block10, _kale_block11, _kale_data_saving_block) _kale_html_artifact = _kale_run_code(_kale_blocks) with open("/featureengineering.html", "w") as f: f.write(_kale_html_artifact) _kale_update_uimetadata('featureengineering') _kale_mlmdutils.call("mark_execution_complete")
def maskandextract(): from kale.common import mlmdutils as _kale_mlmdutils _kale_mlmdutils.init_metadata() _kale_data_loading_block = ''' # -----------------------DATA LOADING START-------------------------------- from kale import marshal as _kale_marshal _kale_marshal.set_data_dir("/shared_volume/kube_sipecam_playground/hsi/notebooks/.hsi_using_r2py.ipynb.kale.marshal.dir") specie_mask = _kale_marshal.load("specie_mask") test_sp = _kale_marshal.load("test_sp") variable_mask_specie = _kale_marshal.load("variable_mask_specie") variable_test_sp = _kale_marshal.load("variable_test_sp") # -----------------------DATA LOADING END---------------------------------- ''' _kale_block1 = ''' import os import subprocess import glob from IPython import get_ipython ipython = get_ipython() ''' _kale_block2 = ''' # ipython.magic("load_ext rpy2.ipython") string_libraries = """R library(hsi);library(raster)""" ipython.magic(string_libraries) print(test_sp) print(specie_mask) ipython.magic("Rpush " + variable_test_sp) ipython.magic("Rpush " + variable_mask_specie) # ##assignment statements to build string variable_test_sp_mask = "test_sp_mask" string1 = "R " + variable_test_sp_mask + " <- occs_filter_by_mask(" string_filter = "".join([string1, variable_test_sp, ",", variable_mask_specie, ")"]) ##(end)assignment statements to build string ipython.magic(string_filter) ##assignment statements to build string variable_test_sp_clean = "test_sp_clean" string1 = "R " + variable_test_sp_clean + " <- clean_dup_by_year(this_species = " string2 = ", threshold = res(" string3 = ")[1])" string_clean_test = "".join([string1, variable_test_sp_mask, string2, variable_mask_specie, string3]) ##(end)assignment statements to build string ipython.magic(string_clean_test) ##assignment statements to build string variable_e_test = "e_test" string1 = "R " + variable_e_test + " <- extract_by_year(this_species=" string2 = ",layers_pattern=\\"_mar\\")" string_extract = "".join([string1, variable_test_sp_clean, string2]) ##(end)assignment statements to build string ipython.magic(string_extract) e_test = ipython.magic("Rget " + variable_e_test) ''' _kale_data_saving_block = ''' # -----------------------DATA SAVING START--------------------------------- from kale import marshal as _kale_marshal _kale_marshal.set_data_dir("/shared_volume/kube_sipecam_playground/hsi/notebooks/.hsi_using_r2py.ipynb.kale.marshal.dir") _kale_marshal.save(e_test, "e_test") _kale_marshal.save(specie_mask, "specie_mask") _kale_marshal.save(variable_e_test, "variable_e_test") _kale_marshal.save(variable_mask_specie, "variable_mask_specie") # -----------------------DATA SAVING END----------------------------------- ''' # run the code blocks inside a jupyter kernel from kale.common.jputils import run_code as _kale_run_code from kale.common.kfputils import \ update_uimetadata as _kale_update_uimetadata _kale_blocks = (_kale_data_loading_block, _kale_block1, _kale_block2, _kale_data_saving_block) _kale_html_artifact = _kale_run_code(_kale_blocks) with open("/maskandextract.html", "w") as f: f.write(_kale_html_artifact) _kale_update_uimetadata('maskandextract') _kale_mlmdutils.call("mark_execution_complete")
def bestmodel(): from kale.common import mlmdutils as _kale_mlmdutils _kale_mlmdutils.init_metadata() _kale_data_loading_block = ''' # -----------------------DATA LOADING START-------------------------------- from kale import marshal as _kale_marshal _kale_marshal.set_data_dir("/shared_volume/kube_sipecam_playground/hsi/notebooks/.hsi_using_r2py.ipynb.kale.marshal.dir") e_test = _kale_marshal.load("e_test") variable_e_test = _kale_marshal.load("variable_e_test") # -----------------------DATA LOADING END---------------------------------- ''' _kale_block1 = ''' import os import subprocess import glob from IPython import get_ipython ipython = get_ipython() ''' _kale_block2 = ''' # ipython.magic("load_ext rpy2.ipython") print(e_test) ipython.magic("Rpush " + variable_e_test) # string_libraries = """R library(hsi)""" ipython.magic(string_libraries) ##assignment statements to build string variable_best_model_2004 = "best_model_2004" string1 = "R " + variable_best_model_2004 + " <- find_best_model(this_species =" string2 = ", cor_threshold = 0.8, ellipsoid_level = 0.975,nvars_to_fit = 3,E = 0.05,RandomPercent = 70,NoOfIteration = 1000,parallel = TRUE,n_cores = 24,plot3d = FALSE)" string_best_model = "".join([string1, variable_e_test, string2]) ##(end)assignment statements to build string ipython.magic(string_best_model) best_model_2004 = ipython.magic("Rget " + variable_best_model_2004) ''' _kale_data_saving_block = ''' # -----------------------DATA SAVING START--------------------------------- from kale import marshal as _kale_marshal _kale_marshal.set_data_dir("/shared_volume/kube_sipecam_playground/hsi/notebooks/.hsi_using_r2py.ipynb.kale.marshal.dir") _kale_marshal.save(best_model_2004, "best_model_2004") _kale_marshal.save(variable_best_model_2004, "variable_best_model_2004") # -----------------------DATA SAVING END----------------------------------- ''' # run the code blocks inside a jupyter kernel from kale.common.jputils import run_code as _kale_run_code from kale.common.kfputils import \ update_uimetadata as _kale_update_uimetadata _kale_blocks = (_kale_data_loading_block, _kale_block1, _kale_block2, _kale_data_saving_block) _kale_html_artifact = _kale_run_code(_kale_blocks) with open("/bestmodel.html", "w") as f: f.write(_kale_html_artifact) _kale_update_uimetadata('bestmodel') _kale_mlmdutils.call("mark_execution_complete")
def temporalprojection(date_of_processing: str, specie: str): _kale_pipeline_parameters_block = ''' date_of_processing = "{}" specie = "{}" '''.format(date_of_processing, specie) from kale.common import mlmdutils as _kale_mlmdutils _kale_mlmdutils.init_metadata() _kale_data_loading_block = ''' # -----------------------DATA LOADING START-------------------------------- from kale import marshal as _kale_marshal _kale_marshal.set_data_dir("/shared_volume/kube_sipecam_playground/hsi/notebooks/.hsi_using_r2py.ipynb.kale.marshal.dir") best_model_2004 = _kale_marshal.load("best_model_2004") specie_mask = _kale_marshal.load("specie_mask") variable_best_model_2004 = _kale_marshal.load("variable_best_model_2004") variable_mask_specie = _kale_marshal.load("variable_mask_specie") # -----------------------DATA LOADING END---------------------------------- ''' _kale_block1 = ''' import os import subprocess import glob from IPython import get_ipython ipython = get_ipython() ''' _kale_block2 = ''' # ipython.magic("load_ext rpy2.ipython") string_libraries = """R library(hsi);library(raster)""" ipython.magic(string_libraries) print(best_model_2004) print(specie_mask) ipython.magic("Rpush " + variable_best_model_2004) ipython.magic("Rpush " + variable_mask_specie) # dir_results = "/shared_volume/new_model_parallel" save_dir = os.path.join(dir_results, date_of_processing) ##assignment statements to build string string1 = "R temporal_projection(this_species = " string2 = ",save_dir = " string3 = "sp_mask = " string4 = ",crs_model = NULL,sp_name =" string5 = ",plot3d = FALSE)" string_temporal_proj = "".join([string1, variable_best_model_2004, string2, "\\"", save_dir, "\\",", string3, variable_mask_specie, string4, "\\"", specie, "\\"", string5]) ##(end)assignment statements to build string if not os.path.exists(save_dir): os.makedirs(save_dir) ipython.magic(string_temporal_proj) #temporal_projection = ipython.magic("Rget temporal_projection") ''' _kale_data_saving_block = ''' # -----------------------DATA SAVING START--------------------------------- from kale import marshal as _kale_marshal _kale_marshal.set_data_dir("/shared_volume/kube_sipecam_playground/hsi/notebooks/.hsi_using_r2py.ipynb.kale.marshal.dir") _kale_marshal.save(save_dir, "save_dir") # -----------------------DATA SAVING END----------------------------------- ''' # run the code blocks inside a jupyter kernel from kale.common.jputils import run_code as _kale_run_code from kale.common.kfputils import \ update_uimetadata as _kale_update_uimetadata _kale_blocks = (_kale_pipeline_parameters_block, _kale_data_loading_block, _kale_block1, _kale_block2, _kale_data_saving_block) _kale_html_artifact = _kale_run_code(_kale_blocks) with open("/temporalprojection.html", "w") as f: f.write(_kale_html_artifact) _kale_update_uimetadata('temporalprojection') _kale_mlmdutils.call("mark_execution_complete")
def readdatainput(dir_mask_specie: str, dir_specie: str, file_mask_specie: str, file_specie: str): _kale_pipeline_parameters_block = ''' dir_mask_specie = "{}" dir_specie = "{}" file_mask_specie = "{}" file_specie = "{}" '''.format(dir_mask_specie, dir_specie, file_mask_specie, file_specie) from kale.common import mlmdutils as _kale_mlmdutils _kale_mlmdutils.init_metadata() _kale_data_loading_block = ''' # -----------------------DATA LOADING START-------------------------------- from kale import marshal as _kale_marshal _kale_marshal.set_data_dir("/shared_volume/kube_sipecam_playground/hsi/notebooks/.hsi_using_r2py.ipynb.kale.marshal.dir") input_dir_data = _kale_marshal.load("input_dir_data") # -----------------------DATA LOADING END---------------------------------- ''' _kale_block1 = ''' import os import subprocess import glob from IPython import get_ipython ipython = get_ipython() ''' _kale_block2 = ''' # ipython.magic("load_ext rpy2.ipython") # string_libraries = """R library(rgdal); library(raster)""" ipython.magic(string_libraries) ##assignment statements to build string variable_specie_loc = "specie_loc" variable_mask_specie = "specie_mask" string1 = "R " + variable_specie_loc + " <- rgdal::readOGR(" string2 = os.path.join(input_dir_data, dir_specie) string3 = variable_mask_specie + " <- raster::raster(" string4 = os.path.join(input_dir_data, dir_mask_specie, file_mask_specie) string_data_input = "".join([string1, "\\"", string2, "\\",", "\\"", file_specie, "\\"",");", string3, "\\"", string4, "\\"", ")"]) ##(end) assignment statements to build string ipython.magic(string_data_input) specie_loc = ipython.magic("Rget " + variable_specie_loc) specie_mask = ipython.magic("Rget " + variable_mask_specie) ''' _kale_data_saving_block = ''' # -----------------------DATA SAVING START--------------------------------- from kale import marshal as _kale_marshal _kale_marshal.set_data_dir("/shared_volume/kube_sipecam_playground/hsi/notebooks/.hsi_using_r2py.ipynb.kale.marshal.dir") _kale_marshal.save(input_dir_data, "input_dir_data") _kale_marshal.save(specie_loc, "specie_loc") _kale_marshal.save(specie_mask, "specie_mask") _kale_marshal.save(variable_mask_specie, "variable_mask_specie") _kale_marshal.save(variable_specie_loc, "variable_specie_loc") # -----------------------DATA SAVING END----------------------------------- ''' # run the code blocks inside a jupyter kernel from kale.common.jputils import run_code as _kale_run_code from kale.common.kfputils import \ update_uimetadata as _kale_update_uimetadata _kale_blocks = (_kale_pipeline_parameters_block, _kale_data_loading_block, _kale_block1, _kale_block2, _kale_data_saving_block) _kale_html_artifact = _kale_run_code(_kale_blocks) with open("/readdatainput.html", "w") as f: f.write(_kale_html_artifact) _kale_update_uimetadata('readdatainput') _kale_mlmdutils.call("mark_execution_complete")