示例#1
0
def final_auto_snapshot():
    from kale.common import mlmdutils as _kale_mlmdutils
    _kale_mlmdutils.init_metadata()

    from kale.marshal.decorator import marshal as _kale_marshal
    from kale.common.runutils import link_artifacts as _kale_link_artifacts

    _kale_pipeline_parameters = {}

    @_kale_marshal([], [], _kale_pipeline_parameters, "/marshal")
    def _no_op():
        pass

    _no_op()

    _kale_artifacts = {}

    _kale_link_artifacts(_kale_artifacts)
    from kale.common import rokutils as _kale_rokutils
    _kale_mlmdutils.call("link_input_rok_artifacts")
    _rok_snapshot_task = _kale_rokutils.snapshot_pipeline_step(
        "test",
        "final_auto_snapshot",
        "",
        before=False)
    _kale_mlmdutils.call("submit_output_rok_artifact", _rok_snapshot_task)

    _kale_mlmdutils.call("mark_execution_complete")
示例#2
0
def step2(a: int, c: int):
    from kale.common import mlmdutils as _kale_mlmdutils
    _kale_mlmdutils.init_metadata()

    from kale.common import rokutils as _kale_rokutils
    _kale_mlmdutils.call("link_input_rok_artifacts")
    _kale_rokutils.snapshot_pipeline_step(
        "test-test",
        "step2",
        "",
        before=True)

    from kale.marshal.decorator import marshal

    pipeline_parameters = {"a": a, "c": c}

    @marshal(['c', 'a', 'data'], ['res'], pipeline_parameters, "/marshal")
    def step2(var1, var2, data):
        print(var1 + var2)
        return 'Test'

    step2()
    _rok_snapshot_task = _kale_rokutils.snapshot_pipeline_step(
        "test-test",
        "step2",
        "",
        before=False)
    _kale_mlmdutils.call("submit_output_rok_artifact", _rok_snapshot_task)

    _kale_mlmdutils.call("mark_execution_complete")
示例#3
0
def results():
    from kale.common import mlmdutils as _kale_mlmdutils
    _kale_mlmdutils.init_metadata()

    _kale_data_loading_block = '''
    # -----------------------DATA LOADING START--------------------------------
    from kale import marshal as _kale_marshal
    _kale_marshal.set_data_dir("/marshal")
    acc_decision_tree = _kale_marshal.load("acc_decision_tree")
    acc_gaussian = _kale_marshal.load("acc_gaussian")
    acc_linear_svc = _kale_marshal.load("acc_linear_svc")
    acc_log = _kale_marshal.load("acc_log")
    acc_random_forest = _kale_marshal.load("acc_random_forest")
    # -----------------------DATA LOADING END----------------------------------
    '''

    _kale_block1 = '''
    import numpy as np
    import pandas as pd
    import seaborn as sns
    from matplotlib import pyplot as plt
    from matplotlib import style

    from sklearn import linear_model
    from sklearn.linear_model import LogisticRegression
    from sklearn.ensemble import RandomForestClassifier
    from sklearn.linear_model import Perceptron
    from sklearn.linear_model import SGDClassifier
    from sklearn.tree import DecisionTreeClassifier
    from sklearn.neighbors import KNeighborsClassifier
    from sklearn.svm import SVC
    from sklearn.naive_bayes import GaussianNB
    '''

    _kale_block2 = '''
    results = pd.DataFrame({
        'Model': ['Support Vector Machines', 'logistic Regression',
                  'Random Forest', 'Naive Bayes', 'Decision Tree'],
        'Score': [acc_linear_svc, acc_log,
                  acc_random_forest, acc_gaussian, acc_decision_tree]})
    result_df = results.sort_values(by='Score', ascending=False)
    result_df = result_df.set_index('Score')
    print(result_df)
    '''

    # run the code blocks inside a jupyter kernel
    from kale.common.jputils import run_code as _kale_run_code
    from kale.common.kfputils import \
        update_uimetadata as _kale_update_uimetadata
    _kale_blocks = (
        _kale_data_loading_block,
        _kale_block1,
        _kale_block2,
    )
    _kale_html_artifact = _kale_run_code(_kale_blocks)
    with open("/results.html", "w") as f:
        f.write(_kale_html_artifact)
    _kale_update_uimetadata('results')

    _kale_mlmdutils.call("mark_execution_complete")
示例#4
0
def test():
    from kale.common import mlmdutils as _kale_mlmdutils
    _kale_mlmdutils.init_metadata()

    block1 = '''
    v1 = "Hello"
    '''

    block2 = '''
    print(v1)
    '''

    data_saving_block = '''
    # -----------------------DATA SAVING START---------------------------------
    from kale.marshal import utils as _kale_marshal_utils
    _kale_marshal_utils.set_kale_data_directory("")
    _kale_marshal_utils.save(v1, "v1")
    # -----------------------DATA SAVING END-----------------------------------
    '''

    # run the code blocks inside a jupyter kernel
    from kale.common.jputils import run_code as _kale_run_code
    from kale.common.kfputils import \
        update_uimetadata as _kale_update_uimetadata
    blocks = (block1, block2, data_saving_block)
    html_artifact = _kale_run_code(blocks)
    with open("/test.html", "w") as f:
        f.write(html_artifact)
    _kale_update_uimetadata('test')

    _kale_mlmdutils.call("mark_execution_complete")
示例#5
0
def step1():
    from kale.common import mlmdutils as _kale_mlmdutils
    _kale_mlmdutils.init_metadata()

    from kale.common import rokutils as _kale_rokutils
    _kale_mlmdutils.call("link_input_rok_artifacts")
    _kale_rokutils.snapshot_pipeline_step(
        "test-test",
        "step1",
        "",
        before=True)

    from kale.marshal.decorator import marshal

    pipeline_parameters = {}

    @marshal([], ['data'], pipeline_parameters, "/marshal")
    def step1():
        return 10

    step1()
    _rok_snapshot_task = _kale_rokutils.snapshot_pipeline_step(
        "test-test",
        "step1",
        "",
        before=False)
    _kale_mlmdutils.call("submit_output_rok_artifact", _rok_snapshot_task)

    _kale_mlmdutils.call("mark_execution_complete")
示例#6
0
def step3(b: str):
    from kale.common import mlmdutils as _kale_mlmdutils
    _kale_mlmdutils.init_metadata()

    from kale.common import rokutils as _kale_rokutils
    _kale_mlmdutils.call("link_input_rok_artifacts")
    _kale_rokutils.snapshot_pipeline_step(
        "test-test",
        "step3",
        "",
        before=True)

    from kale.marshal.decorator import marshal

    pipeline_parameters = {"b": b}

    @marshal(['b', 'data'], [], pipeline_parameters, "/marshal")
    def step3(st, st2):
        print(st)

    step3()
    _rok_snapshot_task = _kale_rokutils.snapshot_pipeline_step(
        "test-test",
        "step3",
        "",
        before=False)
    _kale_mlmdutils.call("submit_output_rok_artifact", _rok_snapshot_task)

    _kale_mlmdutils.call("mark_execution_complete")
示例#7
0
def randomforest():
    from kale.common import mlmdutils as _kale_mlmdutils
    _kale_mlmdutils.init_metadata()

    data_loading_block = '''
    # -----------------------DATA LOADING START--------------------------------
    from kale.marshal import utils as _kale_marshal_utils
    _kale_marshal_utils.set_kale_data_directory("/marshal")
    train_df = _kale_marshal_utils.load("train_df")
    train_labels = _kale_marshal_utils.load("train_labels")
    # -----------------------DATA LOADING END----------------------------------
    '''

    block1 = '''
    import numpy as np
    import pandas as pd
    import seaborn as sns
    from matplotlib import pyplot as plt
    from matplotlib import style

    from sklearn import linear_model
    from sklearn.linear_model import LogisticRegression
    from sklearn.ensemble import RandomForestClassifier
    from sklearn.linear_model import Perceptron
    from sklearn.linear_model import SGDClassifier
    from sklearn.tree import DecisionTreeClassifier
    from sklearn.neighbors import KNeighborsClassifier
    from sklearn.svm import SVC
    from sklearn.naive_bayes import GaussianNB
    '''

    block2 = '''
    random_forest = RandomForestClassifier(n_estimators=100)
    random_forest.fit(train_df, train_labels)
    acc_random_forest = round(random_forest.score(train_df, train_labels) * 100, 2)
    '''

    data_saving_block = '''
    # -----------------------DATA SAVING START---------------------------------
    from kale.marshal import utils as _kale_marshal_utils
    _kale_marshal_utils.set_kale_data_directory("/marshal")
    _kale_marshal_utils.save(acc_random_forest, "acc_random_forest")
    # -----------------------DATA SAVING END-----------------------------------
    '''

    # run the code blocks inside a jupyter kernel
    from kale.common.jputils import run_code as _kale_run_code
    from kale.common.kfputils import \
        update_uimetadata as _kale_update_uimetadata
    blocks = (data_loading_block,
              block1,
              block2,
              data_saving_block)
    html_artifact = _kale_run_code(blocks)
    with open("/randomforest.html", "w") as f:
        f.write(html_artifact)
    _kale_update_uimetadata('randomforest')

    _kale_mlmdutils.call("mark_execution_complete")
示例#8
0
def logisticregression():
    from kale.common import mlmdutils as _kale_mlmdutils
    _kale_mlmdutils.init_metadata()

    _kale_data_loading_block = '''
    # -----------------------DATA LOADING START--------------------------------
    from kale import marshal as _kale_marshal
    _kale_marshal.set_data_dir("/marshal")
    train_df = _kale_marshal.load("train_df")
    train_labels = _kale_marshal.load("train_labels")
    # -----------------------DATA LOADING END----------------------------------
    '''

    _kale_block1 = '''
    import numpy as np
    import pandas as pd
    import seaborn as sns
    from matplotlib import pyplot as plt
    from matplotlib import style

    from sklearn import linear_model
    from sklearn.linear_model import LogisticRegression
    from sklearn.ensemble import RandomForestClassifier
    from sklearn.linear_model import Perceptron
    from sklearn.linear_model import SGDClassifier
    from sklearn.tree import DecisionTreeClassifier
    from sklearn.neighbors import KNeighborsClassifier
    from sklearn.svm import SVC
    from sklearn.naive_bayes import GaussianNB
    '''

    _kale_block2 = '''
    logreg = LogisticRegression(solver='lbfgs', max_iter=110)
    logreg.fit(train_df, train_labels)
    acc_log = round(logreg.score(train_df, train_labels) * 100, 2)
    '''

    _kale_data_saving_block = '''
    # -----------------------DATA SAVING START---------------------------------
    from kale import marshal as _kale_marshal
    _kale_marshal.set_data_dir("/marshal")
    _kale_marshal.save(acc_log, "acc_log")
    # -----------------------DATA SAVING END-----------------------------------
    '''

    # run the code blocks inside a jupyter kernel
    from kale.common.jputils import run_code as _kale_run_code
    from kale.common.kfputils import \
        update_uimetadata as _kale_update_uimetadata
    _kale_blocks = (_kale_data_loading_block,
                    _kale_block1,
                    _kale_block2,
                    _kale_data_saving_block)
    _kale_html_artifact = _kale_run_code(_kale_blocks)
    with open("/logisticregression.html", "w") as f:
        f.write(_kale_html_artifact)
    _kale_update_uimetadata('logisticregression')

    _kale_mlmdutils.call("mark_execution_complete")
示例#9
0
def uploadtos3(date_of_processing: str):
    _kale_pipeline_parameters_block = '''
    date_of_processing = "{}"
    '''.format(date_of_processing)

    from kale.common import mlmdutils as _kale_mlmdutils
    _kale_mlmdutils.init_metadata()

    _kale_data_loading_block = '''
    # -----------------------DATA LOADING START--------------------------------
    from kale import marshal as _kale_marshal
    _kale_marshal.set_data_dir("/shared_volume/kube_sipecam_playground/hsi/notebooks/.hsi_using_r2py.ipynb.kale.marshal.dir")
    save_dir = _kale_marshal.load("save_dir")
    # -----------------------DATA LOADING END----------------------------------
    '''

    _kale_block1 = '''
    import os
    import subprocess
    import glob

    from IPython import get_ipython

    ipython = get_ipython()
    '''

    _kale_block2 = '''
    dir_to_upload = glob.glob(save_dir + '*')[0]

    bucket_results = "s3://hsi-kale-results"


    bucket_path_uploading = os.path.join(bucket_results, date_of_processing)

    cmd_subprocess = ["aws", "s3", "cp",
                      dir_to_upload,
                      bucket_path_uploading,
                      "--recursive"]

    subprocess.run(cmd_subprocess)
    '''

    # run the code blocks inside a jupyter kernel
    from kale.common.jputils import run_code as _kale_run_code
    from kale.common.kfputils import \
        update_uimetadata as _kale_update_uimetadata
    _kale_blocks = (
        _kale_pipeline_parameters_block,
        _kale_data_loading_block,
        _kale_block1,
        _kale_block2,
    )
    _kale_html_artifact = _kale_run_code(_kale_blocks)
    with open("/uploadtos3.html", "w") as f:
        f.write(_kale_html_artifact)
    _kale_update_uimetadata('uploadtos3')

    _kale_mlmdutils.call("mark_execution_complete")
def integral5(density_p: int):
    _kale_pipeline_parameters_block = '''
    density_p = {}
    '''.format(density_p)

    from kale.common import mlmdutils as _kale_mlmdutils
    _kale_mlmdutils.init_metadata()

    _kale_block1 = '''
    import numpy as np
    import math
    '''

    _kale_block2 = '''
    def compute_error(obj,approx):
        \'\'\'
        Relative or absolute error between obj and approx.
        \'\'\'
        if math.fabs(obj) > np.nextafter(0,1):
            Err = math.fabs(obj-approx)/math.fabs(obj)
        else:
            Err = math.fabs(obj-approx)
        return Err
    '''

    _kale_block3 = '''
    f = lambda x,y,z:(x+2*y+3*z)**2
    a1 = 0
    b1 = 1
    a2 = -1/2
    b2 = 0
    a3 = 0
    b3 = 1/3
    x_p = np.random.uniform(a1,b1,density_p)
    y_p = np.random.uniform(a2,b2,density_p)
    z_p = np.random.uniform(a3,b3,density_p)
    obj = 1/12
    vol = (b1-a1)*(b2-a2)*(b3-a3)
    ex_5 = vol*np.mean(f(x_p,y_p,z_p))
    print("error relativo: {:0.4e}".format(compute_error(obj, ex_5)))
    '''

    # run the code blocks inside a jupyter kernel
    from kale.common.jputils import run_code as _kale_run_code
    from kale.common.kfputils import \
        update_uimetadata as _kale_update_uimetadata
    _kale_blocks = (
        _kale_pipeline_parameters_block,
        _kale_block1,
        _kale_block2,
        _kale_block3,
    )
    _kale_html_artifact = _kale_run_code(_kale_blocks)
    with open("/integral5.html", "w") as f:
        f.write(_kale_html_artifact)
    _kale_update_uimetadata('integral5')

    _kale_mlmdutils.call("mark_execution_complete")
示例#11
0
def loaddata():
    from kale.common import mlmdutils as _kale_mlmdutils
    _kale_mlmdutils.init_metadata()

    block1 = '''
    import numpy as np
    import pandas as pd
    import seaborn as sns
    from matplotlib import pyplot as plt
    from matplotlib import style

    from sklearn import linear_model
    from sklearn.linear_model import LogisticRegression
    from sklearn.ensemble import RandomForestClassifier
    from sklearn.linear_model import Perceptron
    from sklearn.linear_model import SGDClassifier
    from sklearn.tree import DecisionTreeClassifier
    from sklearn.neighbors import KNeighborsClassifier
    from sklearn.svm import SVC
    from sklearn.naive_bayes import GaussianNB
    '''

    block2 = '''
    path = "data/"

    PREDICTION_LABEL = 'Survived'

    test_df = pd.read_csv(path + "test.csv")
    train_df = pd.read_csv(path + "train.csv")
    '''

    data_saving_block = '''
    # -----------------------DATA SAVING START---------------------------------
    from kale.marshal import utils as _kale_marshal_utils
    _kale_marshal_utils.set_kale_data_directory("/marshal")
    _kale_marshal_utils.save(PREDICTION_LABEL, "PREDICTION_LABEL")
    _kale_marshal_utils.save(test_df, "test_df")
    _kale_marshal_utils.save(train_df, "train_df")
    # -----------------------DATA SAVING END-----------------------------------
    '''

    # run the code blocks inside a jupyter kernel
    from kale.common.jputils import run_code as _kale_run_code
    from kale.common.kfputils import \
        update_uimetadata as _kale_update_uimetadata
    blocks = (
        block1,
        block2,
        data_saving_block)
    html_artifact = _kale_run_code(blocks)
    with open("/loaddata.html", "w") as f:
        f.write(html_artifact)
    _kale_update_uimetadata('loaddata')

    _kale_mlmdutils.call("mark_execution_complete")
示例#12
0
def final_auto_snapshot():
    from kale.common import mlmdutils as _kale_mlmdutils
    _kale_mlmdutils.init_metadata()

    from kale.common import rokutils as _kale_rokutils
    _kale_mlmdutils.call("link_input_rok_artifacts")
    _rok_snapshot_task = _kale_rokutils.snapshot_pipeline_step(
        "test", "final_auto_snapshot", "/path/to/nb", before=False)
    _kale_mlmdutils.call("submit_output_rok_artifact", _rok_snapshot_task)

    _kale_mlmdutils.call("mark_execution_complete")
def integral6(density_p: int):
    _kale_pipeline_parameters_block = '''
    density_p = {}
    '''.format(density_p)

    from kale.common import mlmdutils as _kale_mlmdutils
    _kale_mlmdutils.init_metadata()

    _kale_block1 = '''
    import numpy as np
    import math
    '''

    _kale_block2 = '''
    def compute_error(obj,approx):
        \'\'\'
        Relative or absolute error between obj and approx.
        \'\'\'
        if math.fabs(obj) > np.nextafter(0,1):
            Err = math.fabs(obj-approx)/math.fabs(obj)
        else:
            Err = math.fabs(obj-approx)
        return Err
    '''

    _kale_block3 = '''
    f = lambda x: 4/(1+x**2)
    x_p = np.random.uniform(0,1,density_p)
    obj = math.pi
    a = 0
    b = 1
    vol = b-a
    f_bar = np.mean(f(x_p))
    ex_6 = vol*f_bar
    print("error relativo: {:0.4e}".format(compute_error(obj,ex_6 )))
    '''

    # run the code blocks inside a jupyter kernel
    from kale.common.jputils import run_code as _kale_run_code
    from kale.common.kfputils import \
        update_uimetadata as _kale_update_uimetadata
    _kale_blocks = (
        _kale_pipeline_parameters_block,
        _kale_block1,
        _kale_block2,
        _kale_block3,
    )
    _kale_html_artifact = _kale_run_code(_kale_blocks)
    with open("/integral6.html", "w") as f:
        f.write(_kale_html_artifact)
    _kale_update_uimetadata('integral6')

    _kale_mlmdutils.call("mark_execution_complete")
示例#14
0
def downloadfroms3():
    from kale.common import mlmdutils as _kale_mlmdutils
    _kale_mlmdutils.init_metadata()

    _kale_block1 = '''
    import os
    import subprocess
    import glob

    from IPython import get_ipython

    ipython = get_ipython()
    '''

    _kale_block2 = '''
    bucket_with_data = "hsi-kale"

    input_dir_data = "/shared_volume/input_data"

    if not os.path.exists(input_dir_data):
        os.makedirs(input_dir_data)

        
    cmd_subprocess = ["aws", "s3", "cp",
                      "s3://" + bucket_with_data,
                      input_dir_data,
                      "--recursive"]

    subprocess.run(cmd_subprocess)
    '''

    _kale_data_saving_block = '''
    # -----------------------DATA SAVING START---------------------------------
    from kale import marshal as _kale_marshal
    _kale_marshal.set_data_dir("/shared_volume/kube_sipecam_playground/hsi/notebooks/.hsi_using_r2py.ipynb.kale.marshal.dir")
    _kale_marshal.save(input_dir_data, "input_dir_data")
    # -----------------------DATA SAVING END-----------------------------------
    '''

    # run the code blocks inside a jupyter kernel
    from kale.common.jputils import run_code as _kale_run_code
    from kale.common.kfputils import \
        update_uimetadata as _kale_update_uimetadata
    _kale_blocks = (_kale_block1, _kale_block2, _kale_data_saving_block)
    _kale_html_artifact = _kale_run_code(_kale_blocks)
    with open("/downloadfroms3.html", "w") as f:
        f.write(_kale_html_artifact)
    _kale_update_uimetadata('downloadfroms3')

    _kale_mlmdutils.call("mark_execution_complete")
def create_matrix(d1: int, d2: int):
    _kale_pipeline_parameters_block = '''
    d1 = {}
    d2 = {}
    '''.format(d1, d2)

    from kale.common import mlmdutils as _kale_mlmdutils
    _kale_mlmdutils.init_metadata()

    _kale_block1 = '''
    import numpy as np
    '''

    _kale_block2 = '''
    rnd_matrix = np.random.rand(d1, d2)
    '''

    _kale_block3 = '''
    from kale.common import kfputils as _kale_kfputils
    _kale_kfp_metrics = {
        "d1": d1,
        "d2": d2
    }
    _kale_kfputils.generate_mlpipeline_metrics(_kale_kfp_metrics)
    '''

    _kale_data_saving_block = '''
    # -----------------------DATA SAVING START---------------------------------
    from kale import marshal as _kale_marshal
    _kale_marshal.set_data_dir("/marshal")
    _kale_marshal.save(rnd_matrix, "rnd_matrix")
    # -----------------------DATA SAVING END-----------------------------------
    '''

    # run the code blocks inside a jupyter kernel
    from kale.common.jputils import run_code as _kale_run_code
    from kale.common.kfputils import \
        update_uimetadata as _kale_update_uimetadata
    _kale_blocks = (_kale_pipeline_parameters_block,
                    _kale_block1,
                    _kale_block2,
                    _kale_block3,
                    _kale_data_saving_block)
    _kale_html_artifact = _kale_run_code(_kale_blocks)
    with open("/create_matrix.html", "w") as f:
        f.write(_kale_html_artifact)
    _kale_update_uimetadata('create_matrix')

    _kale_mlmdutils.call("mark_execution_complete")
示例#16
0
def step3():
    from kale.common import mlmdutils as _kale_mlmdutils
    _kale_mlmdutils.init_metadata()

    from kale.marshal.decorator import marshal

    pipeline_parameters = {}

    @marshal(['_a', '_c'], [], pipeline_parameters, "/marshal")
    def step3(a, c):
        d = c + a
        print(d)

    step3()
    _kale_mlmdutils.call("mark_execution_complete")
示例#17
0
def step2():
    from kale.common import mlmdutils as _kale_mlmdutils
    _kale_mlmdutils.init_metadata()

    from kale.marshal.decorator import marshal

    pipeline_parameters = {}

    @marshal(['_b', '_a'], ['_c'], pipeline_parameters, "/marshal")
    def step2(a, b):
        c = a + b
        print(c)
        return c

    step2()
    _kale_mlmdutils.call("mark_execution_complete")
示例#18
0
def step1():
    from kale.common import mlmdutils as _kale_mlmdutils
    _kale_mlmdutils.init_metadata()

    from kale.marshal.decorator import marshal

    pipeline_parameters = {}

    @marshal([], ['_b', '_a'], pipeline_parameters, "/marshal")
    def step1():
        a = 1
        b = 2
        return a, b

    step1()
    _kale_mlmdutils.call("mark_execution_complete")
示例#19
0
def sum_matrix():
    from kale.common import mlmdutils as _kale_mlmdutils
    _kale_mlmdutils.init_metadata()

    _kale_data_loading_block = '''
    # -----------------------DATA LOADING START--------------------------------
    from kale import marshal as _kale_marshal
    _kale_marshal.set_data_dir("/marshal")
    rnd_matrix = _kale_marshal.load("rnd_matrix")
    # -----------------------DATA LOADING END----------------------------------
    '''

    _kale_block1 = '''
    import numpy as np
    '''

    _kale_block2 = '''
    sum_result = rnd_matrix.sum()
    '''

    _kale_block3 = '''
    from kale.common import kfputils as _kale_kfputils
    _kale_kfp_metrics = {
        "sum-result": sum_result
    }
    _kale_kfputils.generate_mlpipeline_metrics(_kale_kfp_metrics)
    '''

    # run the code blocks inside a jupyter kernel
    from kale.common.jputils import run_code as _kale_run_code
    from kale.common.kfputils import \
        update_uimetadata as _kale_update_uimetadata
    _kale_blocks = (
        _kale_data_loading_block,
        _kale_block1,
        _kale_block2,
        _kale_block3,
    )
    _kale_html_artifact = _kale_run_code(_kale_blocks)
    with open("/sum_matrix.html", "w") as f:
        f.write(_kale_html_artifact)
    _kale_update_uimetadata('sum_matrix')

    _kale_mlmdutils.call("mark_execution_complete")
示例#20
0
def test():
    from kale.common import mlmdutils as _kale_mlmdutils
    _kale_mlmdutils.init_metadata()

    _kale_block1 = '''
    print("hello")
    '''

    # run the code blocks inside a jupyter kernel
    from kale.common.jputils import run_code as _kale_run_code
    from kale.common.kfputils import \
        update_uimetadata as _kale_update_uimetadata
    _kale_blocks = (_kale_block1, )
    _kale_html_artifact = _kale_run_code(_kale_blocks)
    with open("/test.html", "w") as f:
        f.write(_kale_html_artifact)
    _kale_update_uimetadata('test')

    _kale_mlmdutils.call("mark_execution_complete")
示例#21
0
def step3():
    from kale.common import mlmdutils as _kale_mlmdutils
    _kale_mlmdutils.init_metadata()

    from kale.marshal.decorator import marshal as _kale_marshal
    from kale.common.runutils import link_artifacts as _kale_link_artifacts

    _kale_pipeline_parameters = {}

    @_kale_marshal(['_a', '_c'], [], _kale_pipeline_parameters, "/marshal")
    def step3(a, c):
        d = c + a
        print(d)

    step3()

    _kale_artifacts = {}

    _kale_link_artifacts(_kale_artifacts)
    _kale_mlmdutils.call("mark_execution_complete")
示例#22
0
def step1():
    from kale.common import mlmdutils as _kale_mlmdutils
    _kale_mlmdutils.init_metadata()

    from kale.marshal.decorator import marshal as _kale_marshal
    from kale.common.runutils import link_artifacts as _kale_link_artifacts

    _kale_pipeline_parameters = {}

    @_kale_marshal([], ['_b', '_a'], _kale_pipeline_parameters, "/marshal")
    def step1():
        a = 1
        b = 2
        return a, b

    step1()

    _kale_artifacts = {}

    _kale_link_artifacts(_kale_artifacts)
    _kale_mlmdutils.call("mark_execution_complete")
示例#23
0
def step2():
    from kale.common import mlmdutils as _kale_mlmdutils
    _kale_mlmdutils.init_metadata()

    from kale.common.runutils import ttl as _kale_ttl
    from kale.marshal.decorator import marshal as _kale_marshal
    from kale.common.runutils import link_artifacts as _kale_link_artifacts

    _kale_pipeline_parameters = {}

    @_kale_ttl(5)
    @_kale_marshal(['_b', '_a'], ['_c'], _kale_pipeline_parameters, "/marshal")
    def step2(a, b):
        c = a + b
        print(c)
        return c

    step2()

    _kale_artifacts = {}

    _kale_link_artifacts(_kale_artifacts)
    _kale_mlmdutils.call("mark_execution_complete")
示例#24
0
def test():
    from kale.common import mlmdutils as _kale_mlmdutils
    _kale_mlmdutils.init_metadata()

    _kale_data_loading_block = '''
    # -----------------------DATA LOADING START--------------------------------
    from kale.marshal import utils as _kale_marshal_utils
    _kale_marshal_utils.set_kale_data_directory("/marshal")
    v1 = _kale_marshal_utils.load("v1")
    # -----------------------DATA LOADING END----------------------------------
    '''

    # run the code blocks inside a jupyter kernel
    from kale.common.jputils import run_code as _kale_run_code
    from kale.common.kfputils import \
        update_uimetadata as _kale_update_uimetadata
    _kale_blocks = (_kale_data_loading_block, )
    _kale_html_artifact = _kale_run_code(_kale_blocks)
    with open("/test.html", "w") as f:
        f.write(_kale_html_artifact)
    _kale_update_uimetadata('test')

    _kale_mlmdutils.call("mark_execution_complete")
示例#25
0
def datapreprocessing():
    from kale.common import mlmdutils as _kale_mlmdutils
    _kale_mlmdutils.init_metadata()

    _kale_data_loading_block = '''
    # -----------------------DATA LOADING START--------------------------------
    from kale import marshal as _kale_marshal
    _kale_marshal.set_data_dir("/marshal")
    test_df = _kale_marshal.load("test_df")
    train_df = _kale_marshal.load("train_df")
    # -----------------------DATA LOADING END----------------------------------
    '''

    _kale_block1 = '''
    import numpy as np
    import pandas as pd
    import seaborn as sns
    from matplotlib import pyplot as plt
    from matplotlib import style

    from sklearn import linear_model
    from sklearn.linear_model import LogisticRegression
    from sklearn.ensemble import RandomForestClassifier
    from sklearn.linear_model import Perceptron
    from sklearn.linear_model import SGDClassifier
    from sklearn.tree import DecisionTreeClassifier
    from sklearn.neighbors import KNeighborsClassifier
    from sklearn.svm import SVC
    from sklearn.naive_bayes import GaussianNB
    '''

    _kale_block2 = '''
    data = [train_df, test_df]
    for dataset in data:
        dataset['relatives'] = dataset['SibSp'] + dataset['Parch']
        dataset.loc[dataset['relatives'] > 0, 'not_alone'] = 0
        dataset.loc[dataset['relatives'] == 0, 'not_alone'] = 1
        dataset['not_alone'] = dataset['not_alone'].astype(int)
    train_df['not_alone'].value_counts()
    '''

    _kale_block3 = '''
    # This does not contribute to a person survival probability
    train_df = train_df.drop(['PassengerId'], axis=1)
    '''

    _kale_block4 = '''
    import re
    deck = {"A": 1, "B": 2, "C": 3, "D": 4, "E": 5, "F": 6, "G": 7, "U": 8}
    data = [train_df, test_df]

    for dataset in data:
        dataset['Cabin'] = dataset['Cabin'].fillna("U0")
        dataset['Deck'] = dataset['Cabin'].map(lambda x: re.compile("([a-zA-Z]+)").search(x).group())
        dataset['Deck'] = dataset['Deck'].map(deck)
        dataset['Deck'] = dataset['Deck'].fillna(0)
        dataset['Deck'] = dataset['Deck'].astype(int)
    # we can now drop the cabin feature
    train_df = train_df.drop(['Cabin'], axis=1)
    test_df = test_df.drop(['Cabin'], axis=1)
    '''

    _kale_block5 = '''
    data = [train_df, test_df]

    for dataset in data:
        mean = train_df["Age"].mean()
        std = test_df["Age"].std()
        is_null = dataset["Age"].isnull().sum()
        # compute random numbers between the mean, std and is_null
        rand_age = np.random.randint(mean - std, mean + std, size = is_null)
        # fill NaN values in Age column with random values generated
        age_slice = dataset["Age"].copy()
        age_slice[np.isnan(age_slice)] = rand_age
        dataset["Age"] = age_slice
        dataset["Age"] = train_df["Age"].astype(int)
    train_df["Age"].isnull().sum()
    '''

    _kale_block6 = '''
    train_df['Embarked'].describe()
    '''

    _kale_block7 = '''
    # fill with most common value
    common_value = 'S'
    data = [train_df, test_df]

    for dataset in data:
        dataset['Embarked'] = dataset['Embarked'].fillna(common_value)
    '''

    _kale_block8 = '''
    train_df.info()
    '''

    _kale_data_saving_block = '''
    # -----------------------DATA SAVING START---------------------------------
    from kale import marshal as _kale_marshal
    _kale_marshal.set_data_dir("/marshal")
    _kale_marshal.save(test_df, "test_df")
    _kale_marshal.save(train_df, "train_df")
    # -----------------------DATA SAVING END-----------------------------------
    '''

    # run the code blocks inside a jupyter kernel
    from kale.common.jputils import run_code as _kale_run_code
    from kale.common.kfputils import \
        update_uimetadata as _kale_update_uimetadata
    _kale_blocks = (_kale_data_loading_block, _kale_block1, _kale_block2,
                    _kale_block3, _kale_block4, _kale_block5, _kale_block6,
                    _kale_block7, _kale_block8, _kale_data_saving_block)
    _kale_html_artifact = _kale_run_code(_kale_blocks)
    with open("/datapreprocessing.html", "w") as f:
        f.write(_kale_html_artifact)
    _kale_update_uimetadata('datapreprocessing')

    _kale_mlmdutils.call("mark_execution_complete")
示例#26
0
def featureengineering():
    from kale.common import mlmdutils as _kale_mlmdutils
    _kale_mlmdutils.init_metadata()

    _kale_data_loading_block = '''
    # -----------------------DATA LOADING START--------------------------------
    from kale import marshal as _kale_marshal
    _kale_marshal.set_data_dir("/marshal")
    PREDICTION_LABEL = _kale_marshal.load("PREDICTION_LABEL")
    test_df = _kale_marshal.load("test_df")
    train_df = _kale_marshal.load("train_df")
    # -----------------------DATA LOADING END----------------------------------
    '''

    _kale_block1 = '''
    import numpy as np
    import pandas as pd
    import seaborn as sns
    from matplotlib import pyplot as plt
    from matplotlib import style

    from sklearn import linear_model
    from sklearn.linear_model import LogisticRegression
    from sklearn.ensemble import RandomForestClassifier
    from sklearn.linear_model import Perceptron
    from sklearn.linear_model import SGDClassifier
    from sklearn.tree import DecisionTreeClassifier
    from sklearn.neighbors import KNeighborsClassifier
    from sklearn.svm import SVC
    from sklearn.naive_bayes import GaussianNB
    '''

    _kale_block2 = '''
    data = [train_df, test_df]

    for dataset in data:
        dataset['Fare'] = dataset['Fare'].fillna(0)
        dataset['Fare'] = dataset['Fare'].astype(int)
    '''

    _kale_block3 = '''
    data = [train_df, test_df]
    titles = {"Mr": 1, "Miss": 2, "Mrs": 3, "Master": 4, "Rare": 5}

    for dataset in data:
        # extract titles
        dataset['Title'] = dataset.Name.str.extract(' ([A-Za-z]+)\\.', expand=False)
        # replace titles with a more common title or as Rare
        dataset['Title'] = dataset['Title'].replace(['Lady', 'Countess','Capt', 'Col','Don', 'Dr',\\
                                                'Major', 'Rev', 'Sir', 'Jonkheer', 'Dona'], 'Rare')
        dataset['Title'] = dataset['Title'].replace('Mlle', 'Miss')
        dataset['Title'] = dataset['Title'].replace('Ms', 'Miss')
        dataset['Title'] = dataset['Title'].replace('Mme', 'Mrs')
        # convert titles into numbers
        dataset['Title'] = dataset['Title'].map(titles)
        # filling NaN with 0, to get safe
        dataset['Title'] = dataset['Title'].fillna(0)
    train_df = train_df.drop(['Name'], axis=1)
    test_df = test_df.drop(['Name'], axis=1)
    '''

    _kale_block4 = '''
    genders = {"male": 0, "female": 1}
    data = [train_df, test_df]

    for dataset in data:
        dataset['Sex'] = dataset['Sex'].map(genders)
    '''

    _kale_block5 = '''
    train_df = train_df.drop(['Ticket'], axis=1)
    test_df = test_df.drop(['Ticket'], axis=1)
    '''

    _kale_block6 = '''
    ports = {"S": 0, "C": 1, "Q": 2}
    data = [train_df, test_df]

    for dataset in data:
        dataset['Embarked'] = dataset['Embarked'].map(ports)
    '''

    _kale_block7 = '''
    data = [train_df, test_df]
    for dataset in data:
        dataset['Age'] = dataset['Age'].astype(int)
        dataset.loc[ dataset['Age'] <= 11, 'Age'] = 0
        dataset.loc[(dataset['Age'] > 11) & (dataset['Age'] <= 18), 'Age'] = 1
        dataset.loc[(dataset['Age'] > 18) & (dataset['Age'] <= 22), 'Age'] = 2
        dataset.loc[(dataset['Age'] > 22) & (dataset['Age'] <= 27), 'Age'] = 3
        dataset.loc[(dataset['Age'] > 27) & (dataset['Age'] <= 33), 'Age'] = 4
        dataset.loc[(dataset['Age'] > 33) & (dataset['Age'] <= 40), 'Age'] = 5
        dataset.loc[(dataset['Age'] > 40) & (dataset['Age'] <= 66), 'Age'] = 6
        dataset.loc[ dataset['Age'] > 66, 'Age'] = 6

    # let's see how it's distributed train_df['Age'].value_counts()
    '''

    _kale_block8 = '''
    data = [train_df, test_df]

    for dataset in data:
        dataset.loc[ dataset['Fare'] <= 7.91, 'Fare'] = 0
        dataset.loc[(dataset['Fare'] > 7.91) & (dataset['Fare'] <= 14.454), 'Fare'] = 1
        dataset.loc[(dataset['Fare'] > 14.454) & (dataset['Fare'] <= 31), 'Fare']   = 2
        dataset.loc[(dataset['Fare'] > 31) & (dataset['Fare'] <= 99), 'Fare']   = 3
        dataset.loc[(dataset['Fare'] > 99) & (dataset['Fare'] <= 250), 'Fare']   = 4
        dataset.loc[ dataset['Fare'] > 250, 'Fare'] = 5
        dataset['Fare'] = dataset['Fare'].astype(int)
    '''

    _kale_block9 = '''
    data = [train_df, test_df]
    for dataset in data:
        dataset['Age_Class']= dataset['Age']* dataset['Pclass']
    '''

    _kale_block10 = '''
    for dataset in data:
        dataset['Fare_Per_Person'] = dataset['Fare']/(dataset['relatives']+1)
        dataset['Fare_Per_Person'] = dataset['Fare_Per_Person'].astype(int)
    # Let's take a last look at the training set, before we start training the models.
    train_df.head(10)
    '''

    _kale_block11 = '''
    train_labels = train_df[PREDICTION_LABEL]
    train_df = train_df.drop(PREDICTION_LABEL, axis=1)
    '''

    _kale_data_saving_block = '''
    # -----------------------DATA SAVING START---------------------------------
    from kale import marshal as _kale_marshal
    _kale_marshal.set_data_dir("/marshal")
    _kale_marshal.save(train_df, "train_df")
    _kale_marshal.save(train_labels, "train_labels")
    # -----------------------DATA SAVING END-----------------------------------
    '''

    # run the code blocks inside a jupyter kernel
    from kale.common.jputils import run_code as _kale_run_code
    from kale.common.kfputils import \
        update_uimetadata as _kale_update_uimetadata
    _kale_blocks = (_kale_data_loading_block, _kale_block1, _kale_block2,
                    _kale_block3, _kale_block4, _kale_block5, _kale_block6,
                    _kale_block7, _kale_block8, _kale_block9, _kale_block10,
                    _kale_block11, _kale_data_saving_block)
    _kale_html_artifact = _kale_run_code(_kale_blocks)
    with open("/featureengineering.html", "w") as f:
        f.write(_kale_html_artifact)
    _kale_update_uimetadata('featureengineering')

    _kale_mlmdutils.call("mark_execution_complete")
示例#27
0
def maskandextract():
    from kale.common import mlmdutils as _kale_mlmdutils
    _kale_mlmdutils.init_metadata()

    _kale_data_loading_block = '''
    # -----------------------DATA LOADING START--------------------------------
    from kale import marshal as _kale_marshal
    _kale_marshal.set_data_dir("/shared_volume/kube_sipecam_playground/hsi/notebooks/.hsi_using_r2py.ipynb.kale.marshal.dir")
    specie_mask = _kale_marshal.load("specie_mask")
    test_sp = _kale_marshal.load("test_sp")
    variable_mask_specie = _kale_marshal.load("variable_mask_specie")
    variable_test_sp = _kale_marshal.load("variable_test_sp")
    # -----------------------DATA LOADING END----------------------------------
    '''

    _kale_block1 = '''
    import os
    import subprocess
    import glob

    from IPython import get_ipython

    ipython = get_ipython()
    '''

    _kale_block2 = '''
    #
    ipython.magic("load_ext rpy2.ipython")


    string_libraries = """R library(hsi);library(raster)"""

    ipython.magic(string_libraries)

    print(test_sp)
    print(specie_mask)
    ipython.magic("Rpush " + variable_test_sp)
    ipython.magic("Rpush " + variable_mask_specie)
    #

    ##assignment statements to build string
    variable_test_sp_mask = "test_sp_mask"

    string1 = "R " + variable_test_sp_mask + " <- occs_filter_by_mask("

    string_filter = "".join([string1, variable_test_sp, ",",
                             variable_mask_specie,
                             ")"])

    ##(end)assignment statements to build string

    ipython.magic(string_filter)

    ##assignment statements to build string

    variable_test_sp_clean = "test_sp_clean"

    string1 = "R " + variable_test_sp_clean + " <- clean_dup_by_year(this_species = "

    string2 = ", threshold = res("

    string3 = ")[1])"

    string_clean_test = "".join([string1, variable_test_sp_mask,
                                 string2, variable_mask_specie,
                                 string3])

    ##(end)assignment statements to build string

    ipython.magic(string_clean_test)

    ##assignment statements to build string
    variable_e_test = "e_test"

    string1 = "R " + variable_e_test + " <- extract_by_year(this_species="

    string2 = ",layers_pattern=\\"_mar\\")"

    string_extract = "".join([string1, variable_test_sp_clean, string2])

    ##(end)assignment statements to build string

    ipython.magic(string_extract)

    e_test = ipython.magic("Rget " + variable_e_test)
    '''

    _kale_data_saving_block = '''
    # -----------------------DATA SAVING START---------------------------------
    from kale import marshal as _kale_marshal
    _kale_marshal.set_data_dir("/shared_volume/kube_sipecam_playground/hsi/notebooks/.hsi_using_r2py.ipynb.kale.marshal.dir")
    _kale_marshal.save(e_test, "e_test")
    _kale_marshal.save(specie_mask, "specie_mask")
    _kale_marshal.save(variable_e_test, "variable_e_test")
    _kale_marshal.save(variable_mask_specie, "variable_mask_specie")
    # -----------------------DATA SAVING END-----------------------------------
    '''

    # run the code blocks inside a jupyter kernel
    from kale.common.jputils import run_code as _kale_run_code
    from kale.common.kfputils import \
        update_uimetadata as _kale_update_uimetadata
    _kale_blocks = (_kale_data_loading_block, _kale_block1, _kale_block2,
                    _kale_data_saving_block)
    _kale_html_artifact = _kale_run_code(_kale_blocks)
    with open("/maskandextract.html", "w") as f:
        f.write(_kale_html_artifact)
    _kale_update_uimetadata('maskandextract')

    _kale_mlmdutils.call("mark_execution_complete")
示例#28
0
def bestmodel():
    from kale.common import mlmdutils as _kale_mlmdutils
    _kale_mlmdutils.init_metadata()

    _kale_data_loading_block = '''
    # -----------------------DATA LOADING START--------------------------------
    from kale import marshal as _kale_marshal
    _kale_marshal.set_data_dir("/shared_volume/kube_sipecam_playground/hsi/notebooks/.hsi_using_r2py.ipynb.kale.marshal.dir")
    e_test = _kale_marshal.load("e_test")
    variable_e_test = _kale_marshal.load("variable_e_test")
    # -----------------------DATA LOADING END----------------------------------
    '''

    _kale_block1 = '''
    import os
    import subprocess
    import glob

    from IPython import get_ipython

    ipython = get_ipython()
    '''

    _kale_block2 = '''
    #
    ipython.magic("load_ext rpy2.ipython")
    print(e_test)

    ipython.magic("Rpush " + variable_e_test)
    #
    string_libraries = """R library(hsi)"""

    ipython.magic(string_libraries)


    ##assignment statements to build string

    variable_best_model_2004 = "best_model_2004"

    string1 = "R " + variable_best_model_2004 + " <- find_best_model(this_species ="

    string2 = ", cor_threshold = 0.8, ellipsoid_level = 0.975,nvars_to_fit = 3,E = 0.05,RandomPercent = 70,NoOfIteration = 1000,parallel = TRUE,n_cores = 24,plot3d = FALSE)"

    string_best_model = "".join([string1, variable_e_test, string2])

    ##(end)assignment statements to build string


    ipython.magic(string_best_model)

    best_model_2004 = ipython.magic("Rget " + variable_best_model_2004)
    '''

    _kale_data_saving_block = '''
    # -----------------------DATA SAVING START---------------------------------
    from kale import marshal as _kale_marshal
    _kale_marshal.set_data_dir("/shared_volume/kube_sipecam_playground/hsi/notebooks/.hsi_using_r2py.ipynb.kale.marshal.dir")
    _kale_marshal.save(best_model_2004, "best_model_2004")
    _kale_marshal.save(variable_best_model_2004, "variable_best_model_2004")
    # -----------------------DATA SAVING END-----------------------------------
    '''

    # run the code blocks inside a jupyter kernel
    from kale.common.jputils import run_code as _kale_run_code
    from kale.common.kfputils import \
        update_uimetadata as _kale_update_uimetadata
    _kale_blocks = (_kale_data_loading_block, _kale_block1, _kale_block2,
                    _kale_data_saving_block)
    _kale_html_artifact = _kale_run_code(_kale_blocks)
    with open("/bestmodel.html", "w") as f:
        f.write(_kale_html_artifact)
    _kale_update_uimetadata('bestmodel')

    _kale_mlmdutils.call("mark_execution_complete")
示例#29
0
def temporalprojection(date_of_processing: str, specie: str):
    _kale_pipeline_parameters_block = '''
    date_of_processing = "{}"
    specie = "{}"
    '''.format(date_of_processing, specie)

    from kale.common import mlmdutils as _kale_mlmdutils
    _kale_mlmdutils.init_metadata()

    _kale_data_loading_block = '''
    # -----------------------DATA LOADING START--------------------------------
    from kale import marshal as _kale_marshal
    _kale_marshal.set_data_dir("/shared_volume/kube_sipecam_playground/hsi/notebooks/.hsi_using_r2py.ipynb.kale.marshal.dir")
    best_model_2004 = _kale_marshal.load("best_model_2004")
    specie_mask = _kale_marshal.load("specie_mask")
    variable_best_model_2004 = _kale_marshal.load("variable_best_model_2004")
    variable_mask_specie = _kale_marshal.load("variable_mask_specie")
    # -----------------------DATA LOADING END----------------------------------
    '''

    _kale_block1 = '''
    import os
    import subprocess
    import glob

    from IPython import get_ipython

    ipython = get_ipython()
    '''

    _kale_block2 = '''
    #
    ipython.magic("load_ext rpy2.ipython")

    string_libraries = """R library(hsi);library(raster)"""

    ipython.magic(string_libraries)

    print(best_model_2004)
    print(specie_mask)
    ipython.magic("Rpush " + variable_best_model_2004)
    ipython.magic("Rpush " + variable_mask_specie)
    #

    dir_results = "/shared_volume/new_model_parallel"

    save_dir = os.path.join(dir_results, date_of_processing)

    ##assignment statements to build string

    string1 = "R temporal_projection(this_species = "

    string2 = ",save_dir = "

    string3 = "sp_mask = "

    string4 = ",crs_model = NULL,sp_name ="

    string5 = ",plot3d = FALSE)"

    string_temporal_proj = "".join([string1, variable_best_model_2004,
                                    string2, "\\"", save_dir, "\\",",
                                    string3, variable_mask_specie,
                                    string4, "\\"", specie, "\\"", string5])


    ##(end)assignment statements to build string


    if not os.path.exists(save_dir):
        os.makedirs(save_dir)
        
    ipython.magic(string_temporal_proj)

    #temporal_projection = ipython.magic("Rget temporal_projection")
    '''

    _kale_data_saving_block = '''
    # -----------------------DATA SAVING START---------------------------------
    from kale import marshal as _kale_marshal
    _kale_marshal.set_data_dir("/shared_volume/kube_sipecam_playground/hsi/notebooks/.hsi_using_r2py.ipynb.kale.marshal.dir")
    _kale_marshal.save(save_dir, "save_dir")
    # -----------------------DATA SAVING END-----------------------------------
    '''

    # run the code blocks inside a jupyter kernel
    from kale.common.jputils import run_code as _kale_run_code
    from kale.common.kfputils import \
        update_uimetadata as _kale_update_uimetadata
    _kale_blocks = (_kale_pipeline_parameters_block, _kale_data_loading_block,
                    _kale_block1, _kale_block2, _kale_data_saving_block)
    _kale_html_artifact = _kale_run_code(_kale_blocks)
    with open("/temporalprojection.html", "w") as f:
        f.write(_kale_html_artifact)
    _kale_update_uimetadata('temporalprojection')

    _kale_mlmdutils.call("mark_execution_complete")
示例#30
0
def readdatainput(dir_mask_specie: str, dir_specie: str, file_mask_specie: str,
                  file_specie: str):
    _kale_pipeline_parameters_block = '''
    dir_mask_specie = "{}"
    dir_specie = "{}"
    file_mask_specie = "{}"
    file_specie = "{}"
    '''.format(dir_mask_specie, dir_specie, file_mask_specie, file_specie)

    from kale.common import mlmdutils as _kale_mlmdutils
    _kale_mlmdutils.init_metadata()

    _kale_data_loading_block = '''
    # -----------------------DATA LOADING START--------------------------------
    from kale import marshal as _kale_marshal
    _kale_marshal.set_data_dir("/shared_volume/kube_sipecam_playground/hsi/notebooks/.hsi_using_r2py.ipynb.kale.marshal.dir")
    input_dir_data = _kale_marshal.load("input_dir_data")
    # -----------------------DATA LOADING END----------------------------------
    '''

    _kale_block1 = '''
    import os
    import subprocess
    import glob

    from IPython import get_ipython

    ipython = get_ipython()
    '''

    _kale_block2 = '''
    #
    ipython.magic("load_ext rpy2.ipython")
    #

    string_libraries = """R library(rgdal); library(raster)"""

    ipython.magic(string_libraries)

    ##assignment statements to build string

    variable_specie_loc = "specie_loc"

    variable_mask_specie = "specie_mask"

    string1 = "R " + variable_specie_loc + " <- rgdal::readOGR("

    string2 = os.path.join(input_dir_data, dir_specie)

    string3 = variable_mask_specie + " <- raster::raster("

    string4 = os.path.join(input_dir_data, dir_mask_specie, file_mask_specie)

    string_data_input = "".join([string1, "\\"", string2, "\\",", 
                                 "\\"", file_specie, "\\"",");",
                                 string3, "\\"", string4, "\\"", ")"])

    ##(end) assignment statements to build string

    ipython.magic(string_data_input)

    specie_loc = ipython.magic("Rget " + variable_specie_loc)
    specie_mask = ipython.magic("Rget " + variable_mask_specie)
    '''

    _kale_data_saving_block = '''
    # -----------------------DATA SAVING START---------------------------------
    from kale import marshal as _kale_marshal
    _kale_marshal.set_data_dir("/shared_volume/kube_sipecam_playground/hsi/notebooks/.hsi_using_r2py.ipynb.kale.marshal.dir")
    _kale_marshal.save(input_dir_data, "input_dir_data")
    _kale_marshal.save(specie_loc, "specie_loc")
    _kale_marshal.save(specie_mask, "specie_mask")
    _kale_marshal.save(variable_mask_specie, "variable_mask_specie")
    _kale_marshal.save(variable_specie_loc, "variable_specie_loc")
    # -----------------------DATA SAVING END-----------------------------------
    '''

    # run the code blocks inside a jupyter kernel
    from kale.common.jputils import run_code as _kale_run_code
    from kale.common.kfputils import \
        update_uimetadata as _kale_update_uimetadata
    _kale_blocks = (_kale_pipeline_parameters_block, _kale_data_loading_block,
                    _kale_block1, _kale_block2, _kale_data_saving_block)
    _kale_html_artifact = _kale_run_code(_kale_blocks)
    with open("/readdatainput.html", "w") as f:
        f.write(_kale_html_artifact)
    _kale_update_uimetadata('readdatainput')

    _kale_mlmdutils.call("mark_execution_complete")