def svm_model_workflow_with_eval(): svm_model_workflow = svm_workflow('svm_workflow_with_classifier') # ----- CLASSIFIER MODEL EVAL ------ wt_cme = WorkflowTaskDef('classifier_model_eval') wt_cme.inputParameters['targets'] = '${svm_model_test.output.targets}' wt_cme.inputParameters['predicted'] = '${svm_model_test.output.predicted}' wt_cme.inputParameters[ 'performance_metrics'] = '${workflow.input.performance_metrics}' svm_model_workflow.tasks.append(wt_cme) return svm_model_workflow
def test_workflow_decision_creation(): # rand, addition and multiplication tasks wt_rand = WorkflowTaskDef('sample_heads_or_tails', 'sample_heads_or_tails', 'This task generates randoms', {}) wt_add = WorkflowTaskDef('sample_task_addition') wt_add.inputParameters = { 'foo': "${workflow.input.foo}", 'bar': "${workflow.input.bar}" } wt_mult = WorkflowTaskDef('sample_task_multiplication') wt_mult.inputParameters = { 'foo': '${workflow.input.foo}', 'bar': '${workflow.input.bar}' } # -------------------------------------------------------------------------------------------------- # Decision task wt_decision = DecisionTaskDef( 'decision', description='Decides where to go depending on coin toss') wt_decision.inputParameters[ 'coin'] = '${sample_heads_or_tails.output.result}' wt_decision.caseValueParam = 'coin' wt_decision.decisionCases['heads'] = [wt_add] wt_decision.decisionCases['tails'] = [wt_mult] # Workflow definition sample_workflow = WorkflowDef('heads_or_tails_workflow') sample_workflow.description = ' This workflow will toss a coin if heads it does ((foo + bar) else (foo * bar)) ' sample_workflow.inputParameters = ['foo', 'bar'] sample_workflow.version = 2 sample_workflow.tasks = [wt_rand, wt_decision] assert cw.create_workflow(sample_workflow).status_code == 204
def main(): cw = Condu('http://localhost:8080/api') # creating the definitions of each task cw.create_tasks(sample_task_definitions()) # Defining the first task for the workflow ( fork_task addition ) wt_add = WorkflowTaskDef('sample_task_addition') wt_add.inputParameters = { 'foo': "${workflow.input.foo}", 'bar': "${workflow.input.bar}" } wt_mult = WorkflowTaskDef('sample_task_multiplication') wt_mult.inputParameters = { 'foo': '${workflow.input.foo}', 'bar': '${workflow.input.bar}' } # Define the workflow task objects wt_div = WorkflowTaskDef('sample_task_division') wt_div.inputParameters = { 'add_res': '${sample_task_addition.output.result}', 'mult_res': '${sample_task_multiplication.output.result}' } wt_fork = ForkTaskDef('fork_addition_and_multiplication') wt_fork.forkTasks = [[wt_add], [wt_mult]] wt_join = JoinTaskDef('join_addition_and_multiplication') wt_join.joinOn = [ 'fork_addition_and_multiplication', 'join_addition_and_multiplication' ] # Creating the definition for the workflow sample_workflow = WorkflowDef('sample_condu3_workflow_fork') sample_workflow.description = 'Calculate ((foo + bar) / (foo * bar)) in parallel' sample_workflow.inputParameters = ['foo', 'bar'] sample_workflow.version = 1 # "${sample_task_division.output.result}" is equal to wt_div.get_path('result') sample_workflow.outputParameters = {'result': wt_div.get_path('result')} sample_workflow.tasks = [wt_fork, wt_join, wt_div] cw.create_workflow(sample_workflow)
def test_workflow_fork_join_creation(): # Defining the first task for the workflow ( fork_task addition ) wt_add = WorkflowTaskDef('sample_task_addition', 'sample_task_addition', 'addition_task', {}) wt_add.inputParameters = { 'foo': "${workflow.input.foo}", 'bar': "${workflow.input.bar}" } wt_mult = WorkflowTaskDef('sample_task_multiplication', 'sample_task_multiplication', 'multiplication task', {}) wt_mult.inputParameters = { 'foo': '${workflow.input.foo}', 'bar': '${workflow.input.bar}' } # Define the workflow task objects wt_div = WorkflowTaskDef('sample_task_division', 'sample_task_division', 'division task', {}) wt_div.inputParameters = { 'add_res': '${sample_task_addition.output.result}', 'mult_res': '${sample_task_multiplication.output.result}' } wt_fork = ForkTaskDef('fork_addition_and_multiplication', 'addition_and_multiplication', 'A simple but complex fork', {}) wt_fork.forkTasks = [[wt_add], [wt_mult]] wt_join = JoinTaskDef('join_addition_and_multiplication', 'join_addition_and_multiplication', 'A simple but complex join', {}) wt_join.joinOn = ['sample_task_addition', 'sample_task_multiplication'] # Creating the definition for the workflow sample_workflow = WorkflowDef('sample_condu3_workflow_fork') sample_workflow.description = 'Calculate ((foo + bar) / (foo * bar)) in parallel' sample_workflow.inputParameters = ['foo', 'bar'] sample_workflow.version = 1 sample_workflow.outputParameters = { 'result': "${sample_task_division.output.result}" } sample_workflow.tasks = [wt_fork, wt_join, wt_div] assert cw.create_workflow(sample_workflow).status_code == 204
def sample_workflow_def(name: str, version: int, cw): # defining the tasks cw.create_tasks(sample_task_definitions()) # Creating the definition for the workflow sample_workflow = WorkflowDef(name) sample_workflow.description = ' This workflow will make the the calculation of ((foo + bar) / (foo * bar)) ' sample_workflow.inputParameters = ['foo', 'bar'] sample_workflow.version = version sample_workflow.outputParameters = {'result': "${sample_task_division.output.result}"} # Defining the first task for the workflow ( addition ) wt_add = WorkflowTaskDef('sample_task_addition') wt_add.inputParameters = {'foo': "${workflow.input.foo}", 'bar': "${workflow.input.bar}"} wt_mult = WorkflowTaskDef('sample_task_multiplication') wt_mult.inputParameters = {'foo': '${workflow.input.foo}', 'bar': '${workflow.input.bar}'} wt_div = WorkflowTaskDef('sample_task_division') wt_div.inputParameters = {'add_res': '${sample_task_addition.output.result}', 'mult_res': '${sample_task_multiplication.output.result}'} sample_workflow.tasks = [wt_add, wt_mult, wt_div] cw.create_workflow(sample_workflow)
def main(): cw = Condu('http://localhost:8080/api') # creating the definitions of the tasks cw.create_tasks(sample_task_definitions()) # rand, addition and multiplication tasks wt_rand = WorkflowTaskDef('sample_heads_or_tails') wt_add = WorkflowTaskDef('sample_task_addition') wt_add.inputParameters = { 'foo': "${workflow.input.foo}", 'bar': "${workflow.input.bar}" } wt_mult = WorkflowTaskDef('sample_task_multiplication') wt_mult.inputParameters = { 'foo': '${workflow.input.foo}', 'bar': '${workflow.input.bar}' } # -------------------------------------------------------------------------------------------------- # Decision task wt_decision = DecisionTaskDef( 'decision', description='Decides where to go depending on coin toss') wt_decision.inputParameters[ 'coin'] = '${sample_heads_or_tails.output.result}' wt_decision.caseValueParam = 'coin' wt_decision.decisionCases['heads'] = [wt_add] wt_decision.decisionCases['tails'] = [wt_mult] # Workflow definition sample_workflow = WorkflowDef('heads_or_tails_workflow') sample_workflow.description = ' This workflow will toss a coin if heads it does ((foo + bar) else (foo * bar)) ' sample_workflow.inputParameters = ['foo', 'bar'] sample_workflow.version = 2 sample_workflow.tasks = [wt_rand, wt_decision] cw.create_workflow(sample_workflow)
def svm_workflow(workflow_name='svm_model_workflow'): # INPUTS OF WORKFLOW # - split_condition(type), attribute_names (type), attributes_to_ignore (type), class_label(type) # ---------------------------- SPLIT DATA SET ----------------------------------------------------------- wt_split = WorkflowTaskDef('split_dataset') wt_split.inputParameters['dataset_uri'] = '${workflow.input.dataset_uri}' wt_split.inputParameters[ 'split_condition'] = '${workflow.input.split_condition}' # ------ ATTRIBUTE SELECTION ----------- wt_r_ras = WorkflowTaskDef('relieff_ranker_attribute_selection') wt_r_ras.inputParameters[ 'dataset_uri'] = '${split_dataset.output.train_uri}' wt_r_ras.inputParameters['class_label'] = '${workflow.input.class_label}' wt_r_ras.inputParameters[ 'num_instances_to_sample'] = '${workflow.input.num_instances_to_sample}' wt_r_ras.inputParameters['seed'] = '${workflow.input.seed}' wt_r_ras.inputParameters[ 'num_neighbours'] = '${workflow.input.num_neighbours}' wt_r_ras.inputParameters[ 'weight_by_distance'] = '${workflow.input.weight_by_distance}' wt_r_ras.inputParameters['sigma'] = '${workflow.input.sigma}' wt_r_ras.inputParameters['threshold'] = '${workflow.input.threshold}' wt_r_ras.inputParameters[ 'num_attr_to_select'] = '${workflow.input.num_attr_to_select}' # ------ ATTRIBUTE ENCODING TRAIN ----------- wt_haeta = WorkflowTaskDef('one_hot_attribute_encoding_train') wt_haeta.inputParameters[ 'dataset_uri'] = '${relieff_ranker_attribute_selection.output.dataset_uri}' wt_haeta.inputParameters[ 'attributes_to_encode'] = '${workflow.input.attributes_to_encode}' wt_haeta.inputParameters[ 'attributes_to_ignore'] = '${workflow.input.attributes_to_ignore}' # ------ RAW ATTRIBUTE SELECTION ------------ wt_ras = WorkflowTaskDef('raw_attribute_selection') wt_ras.inputParameters['dataset_uri'] = '${split_dataset.output.test_uri}' wt_ras.inputParameters[ 'attributes'] = '${relieff_ranker_attribute_selection.output.selected_columns}' # ------ SCALING TRAIN ---------------------- wt_fsta = WorkflowTaskDef('feature_scaling_train') wt_fsta.inputParameters[ 'dataset_uri'] = '${one_hot_attribute_encoding_train.output.dataset_uri}' wt_fsta.inputParameters[ 'attributes_to_scale'] = '${workflow.input.attributes_to_scale}' wt_fsta.inputParameters['mini'] = '${workflow.input.mini}' wt_fsta.inputParameters['maxi'] = '${workflow.input.maxi}' wt_fsta.inputParameters[ 'attributes_to_ignore'] = '${workflow.input.attributes_to_ignore}' # ------ ATTRIBUTE ENCODING TEST----------- wt_haete = WorkflowTaskDef('one_hot_attribute_encoding_test') wt_haete.inputParameters[ 'dataset_uri'] = '${raw_attribute_selection.output.dataset_uri}' wt_haete.inputParameters[ 'train_cols'] = '${one_hot_attribute_encoding_train.output.dataset_columns}' wt_haete.inputParameters[ 'attributes_to_ignore'] = '${workflow.input.attributes_to_ignore}' wt_haete.inputParameters[ 'attributes_to_encode'] = '${workflow.input.attributes_to_encode}' # ------ SCALING TEST --------------------- wt_fste = WorkflowTaskDef('feature_scaling_test') wt_fste.inputParameters[ 'dataset_uri'] = '${one_hot_attribute_encoding_test.output.dataset_uri}' wt_fste.inputParameters[ 'scaler_uri'] = '${feature_scaling_train.output.scaler_uri}' wt_fste.inputParameters[ 'attributes_to_scale'] = '${workflow.input.attributes_to_scale}' wt_fste.inputParameters[ 'attributes_to_ignore'] = '${workflow.input.attributes_to_ignore}' # ------ SVM MODEL CREATION --------------- wt_smc = WorkflowTaskDef('svm_model_creation') wt_smc.inputParameters[ 'dataset_uri'] = '${feature_scaling_train.output.dataset_uri}' wt_smc.inputParameters['class_label'] = '${workflow.input.class_label}' wt_smc.inputParameters['C'] = '${workflow.input.C}' wt_smc.inputParameters['kernel'] = '${workflow.input.kernel}' # ------ SVM MODEL TEST ------------------- wt_smt = WorkflowTaskDef('svm_model_test') wt_smt.inputParameters[ 'dataset_uri'] = '${feature_scaling_test.output.dataset_uri}' wt_smt.inputParameters['class_label'] = '${workflow.input.class_label}' wt_smt.inputParameters[ 'model_uri'] = '${svm_model_creation.output.model_uri}' # ----- WORKFLOW DEFINITION --------------- # Workflow definition svm_model_workflow = WorkflowDef(workflow_name) svm_model_workflow.description = 'This workflow will take a dataset, and return a model with its respective ' \ 'performance ' svm_model_workflow.inputParameters = [ 'dataset_uri', 'class_label', 'attributes_to_scale', 'attributes_to_encode', 'attributes_to_ignore', 'split_condition', 'num_instances_to_sample', 'seed', 'num_neighbours', 'weight_by_distance', 'sigma', 'threshold', 'num_attr_to_select', 'mini', 'maxi', 'C', 'kernel', 'performance_metrics' ] svm_model_workflow.version = 1 # Split task svm_model_workflow.tasks.append(wt_split) # Relieff ranked attribute selection task svm_model_workflow.tasks.append(wt_r_ras) # We now a fork of with 2 parallel branches wt_1_hot_fork = ForkTaskDef('1_hot_fork', '1_hot_fork', '1_hot_fork', {}) # Tasks one_hot_attribute_encoding_train, feature_scaling_train and raw_attribute_selection wt_1_hot_fork.forkTasks = [[wt_haeta, wt_fsta], [wt_ras]] svm_model_workflow.tasks.append(wt_1_hot_fork) # This join only waits for two tasks wt_1_hot_join = JoinTaskDef('1_hot_join') wt_1_hot_join.joinOn = [ 'one_hot_attribute_encoding_train', 'raw_attribute_selection' ] svm_model_workflow.tasks.append(wt_1_hot_join) # Task one_hot_attribute_encoding_test svm_model_workflow.tasks.append(wt_haete) # Join for the feature_scaling_train wt_2_scaling_train_join = JoinTaskDef('2_scaling_train_join') wt_2_scaling_train_join.joinOn = ['feature_scaling_train'] svm_model_workflow.tasks.append(wt_2_scaling_train_join) # Another fork for the final tasks of svm_model_creation and feature_scaling_test wt_3_build_and_test_scaling_fork = ForkTaskDef( '3_build_and_test_scaling_fork') wt_3_build_and_test_scaling_fork.forkTasks = [[wt_smc], [wt_fste]] svm_model_workflow.append_task(wt_3_build_and_test_scaling_fork) # Last join for these two tasks wt_4_build_and_test_scaling_join = JoinTaskDef( '4_build_and_test_scaling_join') wt_4_build_and_test_scaling_join.joinOn = [ 'svm_model_creation', 'feature_scaling_test' ] svm_model_workflow.append_task(wt_4_build_and_test_scaling_join) # Last task is svm_model_test svm_model_workflow.append_task(wt_smt) return svm_model_workflow
def cross_workflow(): wt_cv_folds_def = WorkflowTaskDef('cv_folds_definition') wt_cv_folds_def.inputParameters[ 'dataset_uri'] = '${workflow.input.dataset_uri}' wt_cv_folds_def.inputParameters[ 'num_folds'] = '${workflow.input.num_folds}' wt_cv_folds_def.inputParameters[ 'stratified'] = '${workflow.input.stratified}' wt_cv_folds_def.inputParameters[ 'randomize'] = '${workflow.input.randomize}' wt_cv_folds_def.inputParameters['seed'] = '${workflow.input.seed}' wt_cv_folds_def.inputParameters[ 'class_label'] = '${workflow.input.class_label}' wt_dynamic = DynamicForkTaskDef('dynamic_cross') wt_dynamic.inputParameters[ 'dynamicTaskNames'] = '${cv_folds_definition.output.dynamicTaskNames}' wt_dynamic.inputParameters[ 'dynamicTaskInputs'] = '${cv_folds_definition.output.dynamicTaskInputs}' wt_dynamic.dynamicForkTasksParam = 'dynamicTaskNames' wt_dynamic.dynamicForkTasksInputParamName = 'dynamicTaskInputs' wt_join = JoinTaskDef('1_join', '1_join', '1_join', {}) wt_multiple_models = WorkflowTaskDef('multiple_model_evals') wt_multiple_models.inputParameters[ 'model_performances'] = '${1_join.output}' cross_svm_model_workflow = WorkflowDef('cross_validation_svm_workflow') cross_svm_model_workflow.description = 'cross_validation_svm_workflow' cross_svm_model_workflow.inputParameters = [ 'dataset_uri', 'class_label', 'attributes_to_scale', 'attributes_to_encode', 'attributes_to_ignore', 'split_condition', 'num_instances_to_sample', 'seed', 'num_neighbours', 'weight_by_distance', 'sigma', 'threshold', 'num_attr_to_select', 'mini', 'maxi', 'C', 'kernel', 'num_folds', 'stratified', 'randomize', 'seed', 'performance_metrics' ] cross_svm_model_workflow.version = 1 cross_svm_model_workflow.tasks.append(wt_cv_folds_def) cross_svm_model_workflow.tasks.append(wt_dynamic) cross_svm_model_workflow.tasks.append(wt_join) cross_svm_model_workflow.tasks.append(wt_multiple_models) return cross_svm_model_workflow