Пример #1
0
def svm_model_workflow_with_eval():
    svm_model_workflow = svm_workflow('svm_workflow_with_classifier')
    # ----- CLASSIFIER MODEL EVAL ------
    wt_cme = WorkflowTaskDef('classifier_model_eval')
    wt_cme.inputParameters['targets'] = '${svm_model_test.output.targets}'
    wt_cme.inputParameters['predicted'] = '${svm_model_test.output.predicted}'
    wt_cme.inputParameters[
        'performance_metrics'] = '${workflow.input.performance_metrics}'
    svm_model_workflow.tasks.append(wt_cme)
    return svm_model_workflow
Пример #2
0
def test_workflow_decision_creation():
    # rand, addition and multiplication tasks
    wt_rand = WorkflowTaskDef('sample_heads_or_tails', 'sample_heads_or_tails',
                              'This task generates randoms', {})

    wt_add = WorkflowTaskDef('sample_task_addition')
    wt_add.inputParameters = {
        'foo': "${workflow.input.foo}",
        'bar': "${workflow.input.bar}"
    }

    wt_mult = WorkflowTaskDef('sample_task_multiplication')
    wt_mult.inputParameters = {
        'foo': '${workflow.input.foo}',
        'bar': '${workflow.input.bar}'
    }
    # --------------------------------------------------------------------------------------------------
    # Decision task
    wt_decision = DecisionTaskDef(
        'decision', description='Decides where to go depending on coin toss')
    wt_decision.inputParameters[
        'coin'] = '${sample_heads_or_tails.output.result}'
    wt_decision.caseValueParam = 'coin'
    wt_decision.decisionCases['heads'] = [wt_add]
    wt_decision.decisionCases['tails'] = [wt_mult]

    # Workflow definition
    sample_workflow = WorkflowDef('heads_or_tails_workflow')
    sample_workflow.description = ' This workflow will toss a coin if heads it does ((foo + bar) else (foo * bar)) '
    sample_workflow.inputParameters = ['foo', 'bar']
    sample_workflow.version = 2
    sample_workflow.tasks = [wt_rand, wt_decision]
    assert cw.create_workflow(sample_workflow).status_code == 204
Пример #3
0
def main():
    cw = Condu('http://localhost:8080/api')
    # creating the definitions of each task
    cw.create_tasks(sample_task_definitions())

    # Defining the first task for the workflow ( fork_task addition )
    wt_add = WorkflowTaskDef('sample_task_addition')
    wt_add.inputParameters = {
        'foo': "${workflow.input.foo}",
        'bar': "${workflow.input.bar}"
    }

    wt_mult = WorkflowTaskDef('sample_task_multiplication')
    wt_mult.inputParameters = {
        'foo': '${workflow.input.foo}',
        'bar': '${workflow.input.bar}'
    }

    # Define the workflow task objects

    wt_div = WorkflowTaskDef('sample_task_division')
    wt_div.inputParameters = {
        'add_res': '${sample_task_addition.output.result}',
        'mult_res': '${sample_task_multiplication.output.result}'
    }

    wt_fork = ForkTaskDef('fork_addition_and_multiplication')
    wt_fork.forkTasks = [[wt_add], [wt_mult]]

    wt_join = JoinTaskDef('join_addition_and_multiplication')

    wt_join.joinOn = [
        'fork_addition_and_multiplication', 'join_addition_and_multiplication'
    ]

    # Creating the definition for the workflow
    sample_workflow = WorkflowDef('sample_condu3_workflow_fork')
    sample_workflow.description = 'Calculate ((foo + bar) / (foo * bar)) in parallel'
    sample_workflow.inputParameters = ['foo', 'bar']
    sample_workflow.version = 1
    # "${sample_task_division.output.result}" is equal to wt_div.get_path('result')
    sample_workflow.outputParameters = {'result': wt_div.get_path('result')}
    sample_workflow.tasks = [wt_fork, wt_join, wt_div]
    cw.create_workflow(sample_workflow)
Пример #4
0
def test_workflow_fork_join_creation():
    # Defining the first task for the workflow ( fork_task addition )
    wt_add = WorkflowTaskDef('sample_task_addition', 'sample_task_addition',
                             'addition_task', {})
    wt_add.inputParameters = {
        'foo': "${workflow.input.foo}",
        'bar': "${workflow.input.bar}"
    }

    wt_mult = WorkflowTaskDef('sample_task_multiplication',
                              'sample_task_multiplication',
                              'multiplication task', {})
    wt_mult.inputParameters = {
        'foo': '${workflow.input.foo}',
        'bar': '${workflow.input.bar}'
    }

    # Define the workflow task objects

    wt_div = WorkflowTaskDef('sample_task_division', 'sample_task_division',
                             'division task', {})
    wt_div.inputParameters = {
        'add_res': '${sample_task_addition.output.result}',
        'mult_res': '${sample_task_multiplication.output.result}'
    }

    wt_fork = ForkTaskDef('fork_addition_and_multiplication',
                          'addition_and_multiplication',
                          'A simple but complex fork', {})
    wt_fork.forkTasks = [[wt_add], [wt_mult]]

    wt_join = JoinTaskDef('join_addition_and_multiplication',
                          'join_addition_and_multiplication',
                          'A simple but complex join', {})

    wt_join.joinOn = ['sample_task_addition', 'sample_task_multiplication']

    # Creating the definition for the workflow
    sample_workflow = WorkflowDef('sample_condu3_workflow_fork')
    sample_workflow.description = 'Calculate ((foo + bar) / (foo * bar)) in parallel'
    sample_workflow.inputParameters = ['foo', 'bar']
    sample_workflow.version = 1
    sample_workflow.outputParameters = {
        'result': "${sample_task_division.output.result}"
    }
    sample_workflow.tasks = [wt_fork, wt_join, wt_div]
    assert cw.create_workflow(sample_workflow).status_code == 204
Пример #5
0
def sample_workflow_def(name: str, version: int, cw):
    # defining the tasks
    cw.create_tasks(sample_task_definitions())
    # Creating the definition for the workflow
    sample_workflow = WorkflowDef(name)
    sample_workflow.description = ' This workflow will make the the calculation of ((foo + bar) / (foo * bar)) '
    sample_workflow.inputParameters = ['foo', 'bar']
    sample_workflow.version = version
    sample_workflow.outputParameters = {'result': "${sample_task_division.output.result}"}
    # Defining the first task for the workflow ( addition )
    wt_add = WorkflowTaskDef('sample_task_addition')
    wt_add.inputParameters = {'foo': "${workflow.input.foo}", 'bar': "${workflow.input.bar}"}

    wt_mult = WorkflowTaskDef('sample_task_multiplication')
    wt_mult.inputParameters = {'foo': '${workflow.input.foo}', 'bar': '${workflow.input.bar}'}

    wt_div = WorkflowTaskDef('sample_task_division')
    wt_div.inputParameters = {'add_res': '${sample_task_addition.output.result}',
                              'mult_res': '${sample_task_multiplication.output.result}'}

    sample_workflow.tasks = [wt_add, wt_mult, wt_div]
    cw.create_workflow(sample_workflow)
Пример #6
0
def main():
    cw = Condu('http://localhost:8080/api')
    # creating the definitions of the tasks
    cw.create_tasks(sample_task_definitions())

    # rand, addition and multiplication tasks
    wt_rand = WorkflowTaskDef('sample_heads_or_tails')

    wt_add = WorkflowTaskDef('sample_task_addition')
    wt_add.inputParameters = {
        'foo': "${workflow.input.foo}",
        'bar': "${workflow.input.bar}"
    }

    wt_mult = WorkflowTaskDef('sample_task_multiplication')
    wt_mult.inputParameters = {
        'foo': '${workflow.input.foo}',
        'bar': '${workflow.input.bar}'
    }
    # --------------------------------------------------------------------------------------------------
    # Decision task
    wt_decision = DecisionTaskDef(
        'decision', description='Decides where to go depending on coin toss')
    wt_decision.inputParameters[
        'coin'] = '${sample_heads_or_tails.output.result}'
    wt_decision.caseValueParam = 'coin'
    wt_decision.decisionCases['heads'] = [wt_add]
    wt_decision.decisionCases['tails'] = [wt_mult]

    # Workflow definition
    sample_workflow = WorkflowDef('heads_or_tails_workflow')
    sample_workflow.description = ' This workflow will toss a coin if heads it does ((foo + bar) else (foo * bar)) '
    sample_workflow.inputParameters = ['foo', 'bar']
    sample_workflow.version = 2
    sample_workflow.tasks = [wt_rand, wt_decision]
    cw.create_workflow(sample_workflow)
Пример #7
0
def svm_workflow(workflow_name='svm_model_workflow'):
    # INPUTS OF WORKFLOW
    # - split_condition(type), attribute_names (type), attributes_to_ignore (type), class_label(type)
    # ---------------------------- SPLIT DATA SET -----------------------------------------------------------
    wt_split = WorkflowTaskDef('split_dataset')
    wt_split.inputParameters['dataset_uri'] = '${workflow.input.dataset_uri}'
    wt_split.inputParameters[
        'split_condition'] = '${workflow.input.split_condition}'
    # ------ ATTRIBUTE SELECTION -----------
    wt_r_ras = WorkflowTaskDef('relieff_ranker_attribute_selection')
    wt_r_ras.inputParameters[
        'dataset_uri'] = '${split_dataset.output.train_uri}'
    wt_r_ras.inputParameters['class_label'] = '${workflow.input.class_label}'
    wt_r_ras.inputParameters[
        'num_instances_to_sample'] = '${workflow.input.num_instances_to_sample}'
    wt_r_ras.inputParameters['seed'] = '${workflow.input.seed}'
    wt_r_ras.inputParameters[
        'num_neighbours'] = '${workflow.input.num_neighbours}'
    wt_r_ras.inputParameters[
        'weight_by_distance'] = '${workflow.input.weight_by_distance}'
    wt_r_ras.inputParameters['sigma'] = '${workflow.input.sigma}'
    wt_r_ras.inputParameters['threshold'] = '${workflow.input.threshold}'
    wt_r_ras.inputParameters[
        'num_attr_to_select'] = '${workflow.input.num_attr_to_select}'
    # ------ ATTRIBUTE ENCODING TRAIN -----------
    wt_haeta = WorkflowTaskDef('one_hot_attribute_encoding_train')
    wt_haeta.inputParameters[
        'dataset_uri'] = '${relieff_ranker_attribute_selection.output.dataset_uri}'
    wt_haeta.inputParameters[
        'attributes_to_encode'] = '${workflow.input.attributes_to_encode}'
    wt_haeta.inputParameters[
        'attributes_to_ignore'] = '${workflow.input.attributes_to_ignore}'
    # ------ RAW ATTRIBUTE SELECTION ------------
    wt_ras = WorkflowTaskDef('raw_attribute_selection')
    wt_ras.inputParameters['dataset_uri'] = '${split_dataset.output.test_uri}'
    wt_ras.inputParameters[
        'attributes'] = '${relieff_ranker_attribute_selection.output.selected_columns}'
    # ------ SCALING TRAIN ----------------------
    wt_fsta = WorkflowTaskDef('feature_scaling_train')
    wt_fsta.inputParameters[
        'dataset_uri'] = '${one_hot_attribute_encoding_train.output.dataset_uri}'
    wt_fsta.inputParameters[
        'attributes_to_scale'] = '${workflow.input.attributes_to_scale}'
    wt_fsta.inputParameters['mini'] = '${workflow.input.mini}'
    wt_fsta.inputParameters['maxi'] = '${workflow.input.maxi}'
    wt_fsta.inputParameters[
        'attributes_to_ignore'] = '${workflow.input.attributes_to_ignore}'
    # ------ ATTRIBUTE ENCODING TEST-----------
    wt_haete = WorkflowTaskDef('one_hot_attribute_encoding_test')
    wt_haete.inputParameters[
        'dataset_uri'] = '${raw_attribute_selection.output.dataset_uri}'
    wt_haete.inputParameters[
        'train_cols'] = '${one_hot_attribute_encoding_train.output.dataset_columns}'
    wt_haete.inputParameters[
        'attributes_to_ignore'] = '${workflow.input.attributes_to_ignore}'
    wt_haete.inputParameters[
        'attributes_to_encode'] = '${workflow.input.attributes_to_encode}'
    # ------ SCALING TEST ---------------------
    wt_fste = WorkflowTaskDef('feature_scaling_test')
    wt_fste.inputParameters[
        'dataset_uri'] = '${one_hot_attribute_encoding_test.output.dataset_uri}'
    wt_fste.inputParameters[
        'scaler_uri'] = '${feature_scaling_train.output.scaler_uri}'
    wt_fste.inputParameters[
        'attributes_to_scale'] = '${workflow.input.attributes_to_scale}'
    wt_fste.inputParameters[
        'attributes_to_ignore'] = '${workflow.input.attributes_to_ignore}'
    # ------ SVM MODEL CREATION ---------------
    wt_smc = WorkflowTaskDef('svm_model_creation')
    wt_smc.inputParameters[
        'dataset_uri'] = '${feature_scaling_train.output.dataset_uri}'
    wt_smc.inputParameters['class_label'] = '${workflow.input.class_label}'
    wt_smc.inputParameters['C'] = '${workflow.input.C}'
    wt_smc.inputParameters['kernel'] = '${workflow.input.kernel}'
    # ------ SVM MODEL TEST -------------------
    wt_smt = WorkflowTaskDef('svm_model_test')
    wt_smt.inputParameters[
        'dataset_uri'] = '${feature_scaling_test.output.dataset_uri}'
    wt_smt.inputParameters['class_label'] = '${workflow.input.class_label}'
    wt_smt.inputParameters[
        'model_uri'] = '${svm_model_creation.output.model_uri}'

    # ----- WORKFLOW DEFINITION ---------------
    # Workflow definition
    svm_model_workflow = WorkflowDef(workflow_name)
    svm_model_workflow.description = 'This workflow will take a dataset, and return a model with its respective ' \
                                     'performance '
    svm_model_workflow.inputParameters = [
        'dataset_uri', 'class_label', 'attributes_to_scale',
        'attributes_to_encode', 'attributes_to_ignore', 'split_condition',
        'num_instances_to_sample', 'seed', 'num_neighbours',
        'weight_by_distance', 'sigma', 'threshold', 'num_attr_to_select',
        'mini', 'maxi', 'C', 'kernel', 'performance_metrics'
    ]
    svm_model_workflow.version = 1
    # Split task
    svm_model_workflow.tasks.append(wt_split)
    # Relieff ranked attribute selection task
    svm_model_workflow.tasks.append(wt_r_ras)
    # We now a fork of with 2 parallel branches
    wt_1_hot_fork = ForkTaskDef('1_hot_fork', '1_hot_fork', '1_hot_fork', {})
    #     Tasks one_hot_attribute_encoding_train, feature_scaling_train and raw_attribute_selection
    wt_1_hot_fork.forkTasks = [[wt_haeta, wt_fsta], [wt_ras]]
    svm_model_workflow.tasks.append(wt_1_hot_fork)
    # This join only waits for two tasks
    wt_1_hot_join = JoinTaskDef('1_hot_join')
    wt_1_hot_join.joinOn = [
        'one_hot_attribute_encoding_train', 'raw_attribute_selection'
    ]
    svm_model_workflow.tasks.append(wt_1_hot_join)
    # Task one_hot_attribute_encoding_test
    svm_model_workflow.tasks.append(wt_haete)
    # Join for the feature_scaling_train
    wt_2_scaling_train_join = JoinTaskDef('2_scaling_train_join')
    wt_2_scaling_train_join.joinOn = ['feature_scaling_train']
    svm_model_workflow.tasks.append(wt_2_scaling_train_join)
    # Another fork for the final tasks of svm_model_creation and feature_scaling_test
    wt_3_build_and_test_scaling_fork = ForkTaskDef(
        '3_build_and_test_scaling_fork')
    wt_3_build_and_test_scaling_fork.forkTasks = [[wt_smc], [wt_fste]]
    svm_model_workflow.append_task(wt_3_build_and_test_scaling_fork)
    # Last join for these two tasks
    wt_4_build_and_test_scaling_join = JoinTaskDef(
        '4_build_and_test_scaling_join')
    wt_4_build_and_test_scaling_join.joinOn = [
        'svm_model_creation', 'feature_scaling_test'
    ]
    svm_model_workflow.append_task(wt_4_build_and_test_scaling_join)
    # Last task is svm_model_test
    svm_model_workflow.append_task(wt_smt)
    return svm_model_workflow
Пример #8
0
def cross_workflow():
    wt_cv_folds_def = WorkflowTaskDef('cv_folds_definition')
    wt_cv_folds_def.inputParameters[
        'dataset_uri'] = '${workflow.input.dataset_uri}'
    wt_cv_folds_def.inputParameters[
        'num_folds'] = '${workflow.input.num_folds}'
    wt_cv_folds_def.inputParameters[
        'stratified'] = '${workflow.input.stratified}'
    wt_cv_folds_def.inputParameters[
        'randomize'] = '${workflow.input.randomize}'
    wt_cv_folds_def.inputParameters['seed'] = '${workflow.input.seed}'
    wt_cv_folds_def.inputParameters[
        'class_label'] = '${workflow.input.class_label}'

    wt_dynamic = DynamicForkTaskDef('dynamic_cross')
    wt_dynamic.inputParameters[
        'dynamicTaskNames'] = '${cv_folds_definition.output.dynamicTaskNames}'
    wt_dynamic.inputParameters[
        'dynamicTaskInputs'] = '${cv_folds_definition.output.dynamicTaskInputs}'
    wt_dynamic.dynamicForkTasksParam = 'dynamicTaskNames'
    wt_dynamic.dynamicForkTasksInputParamName = 'dynamicTaskInputs'

    wt_join = JoinTaskDef('1_join', '1_join', '1_join', {})

    wt_multiple_models = WorkflowTaskDef('multiple_model_evals')
    wt_multiple_models.inputParameters[
        'model_performances'] = '${1_join.output}'

    cross_svm_model_workflow = WorkflowDef('cross_validation_svm_workflow')
    cross_svm_model_workflow.description = 'cross_validation_svm_workflow'
    cross_svm_model_workflow.inputParameters = [
        'dataset_uri', 'class_label', 'attributes_to_scale',
        'attributes_to_encode', 'attributes_to_ignore', 'split_condition',
        'num_instances_to_sample', 'seed', 'num_neighbours',
        'weight_by_distance', 'sigma', 'threshold', 'num_attr_to_select',
        'mini', 'maxi', 'C', 'kernel', 'num_folds', 'stratified', 'randomize',
        'seed', 'performance_metrics'
    ]
    cross_svm_model_workflow.version = 1
    cross_svm_model_workflow.tasks.append(wt_cv_folds_def)
    cross_svm_model_workflow.tasks.append(wt_dynamic)
    cross_svm_model_workflow.tasks.append(wt_join)
    cross_svm_model_workflow.tasks.append(wt_multiple_models)
    return cross_svm_model_workflow