def pipeline(project_id='loan-predict'): preprocessor = dsl.ContainerOp( name='preprocessor', image='praveen049/loan-predict-logreg-preproc', command=['python', 'preprocessor.py'], arguments=[ '--output-x', '/x.pkl', '--output-y', '/y.pkl', ], file_outputs={ 'x-output': '/x.pkl', 'y-output': '/y.pkl', } ) trainer = dsl.ContainerOp( name='trainer', image='praveen049/loan-predict-logreg-train', command=['python', 'train.py'], arguments=[ '--input_x_path_file', dsl.InputArgumentPath(preprocessor.outputs['x-output']), '--input_y_path_file', dsl.InputArgumentPath(preprocessor.outputs['y-output']), '--output_model', '/model.pkl', '--output_model_path_file', '/model.txt', ], file_outputs={ 'model': '/model.pkl', } ) trainer.after(preprocessor)
def component_with_inline_input_artifact(text: str): return dsl.ContainerOp( name='component_with_inline_input_artifact', image='alpine', command=[ 'cat', dsl.InputArgumentPath( text, path='/tmp/inputs/text/data', input='text') ], # path and input are optional )
def component_with_input_artifact(text): '''A component that passes text as input artifact''' return dsl.ContainerOp( name='component_with_input_artifact', artifact_argument_paths=[ dsl.InputArgumentPath(argument=text, path='/tmp/inputs/text/data', input='text'), # path and input are optional ], image='alpine', command=['cat', '/tmp/inputs/text/data'], )
def prepare_data_op(input_path, output_path, pvc_path, vol, TAG): return dsl.ContainerOp(name='prepare_data', image=f'rsthesis/prepare_data_image:{TAG}', arguments=[ '--input_path', dsl.InputArgumentPath(input_path), '--output_path', output_path ], command=["python", "prepare_data.py"], file_outputs={'data_output': output_path}, pvolumes={pvc_path: vol}, container_kwargs={"image_pull_policy": "Always"})
def pipeline_demo(): #each component is defined as a function that returns an object of type ContainerOP, which comes from kfp sdk preprocess_op = dsl.ContainerOp( name='Preprocess Data', image='ghcr.io/jaredallencarterjac/preprocess:latest', arguments=[], #/app is coming from where we placed the npy files in the WORKDIR of the Dockerfile file_outputs={ 'x_train': '/app/x_train.npy', 'x_test': '/app/x_test.npy', 'y_train': '/app/y_train.npy', 'y_test': '/app/y_test.npy', }) preprocess_op.set_image_pull_policy("Always") train_op = dsl.ContainerOp( name='Train Model', image='ghcr.io/jaredallencarterjac/train:latest', arguments=[ '--x_train', dsl.InputArgumentPath(preprocess_op.outputs['x_train']), '--y_train', dsl.InputArgumentPath(preprocess_op.outputs['y_train']) ], #model trained and packaged to send to test step file_outputs={'model': '/app/model.pkl'}, ) train_op.set_image_pull_policy("Always") test_op = dsl.ContainerOp( name='Test Model', image='ghcr.io/jaredallencarterjac/test:latest', arguments=[ '--x_test', dsl.InputArgumentPath(preprocess_op.outputs['x_test']), '--y_test', dsl.InputArgumentPath(preprocess_op.outputs['y_test']), '--model', dsl.InputArgumentPath(train_op.outputs['model']) ], file_outputs={'mean_squared_error': '/app/output.txt'}, ) test_op.set_image_pull_policy("Always") deploy_op = dsl.ContainerOp( name='Deploy Model', image='ghcr.io/jaredallencarterjac/deploy:latest', arguments=[ '--model', dsl.InputArgumentPath(train_op.outputs['model']) ]).after(test_op) deploy_op.set_image_pull_policy("Always")
def presidential_elections_pipeline(): _preprocess_op = preprocess_op() _train_op = train_op( dsl.InputArgumentPath(_preprocess_op.outputs['x_train']), dsl.InputArgumentPath( _preprocess_op.outputs['y_train'])).after(_preprocess_op) _test_op = test_op(dsl.InputArgumentPath(_preprocess_op.outputs['x_test']), dsl.InputArgumentPath(_preprocess_op.outputs['y_test']), dsl.InputArgumentPath( _train_op.outputs['model'])).after(_train_op) deploy_model_op(dsl.InputArgumentPath( _train_op.outputs['model'])).after(_test_op)
def _get_custom_job_op( task_name: str, job_spec: Dict[str, Any], input_artifacts: Optional[Dict[str, dsl.PipelineParam]] = None, input_parameters: Optional[Dict[str, _ValueOrPipelineParam]] = None, output_artifacts: Optional[Dict[str, Type[artifact.Artifact]]] = None, output_parameters: Optional[Dict[str, Any]] = None, ) -> AiPlatformCustomJobOp: """Gets an AiPlatformCustomJobOp from job spec and I/O definition.""" pipeline_task_spec = pipeline_spec_pb2.PipelineTaskSpec() pipeline_component_spec = pipeline_spec_pb2.ComponentSpec() pipeline_task_spec.task_info.CopyFrom( pipeline_spec_pb2.PipelineTaskInfo(name=task_name)) # Iterate through the inputs/outputs declaration to get pipeline component # spec. for input_name, param in input_parameters.items(): if isinstance(param, dsl.PipelineParam): pipeline_component_spec.input_definitions.parameters[ input_name].type = type_utils.get_parameter_type( param.param_type) else: pipeline_component_spec.input_definitions.parameters[ input_name].type = type_utils.get_parameter_type(type(param)) for input_name, art in input_artifacts.items(): if not isinstance(art, dsl.PipelineParam): raise RuntimeError( 'Get unresolved input artifact for input %s. Input ' 'artifacts must be connected to a producer task.' % input_name) pipeline_component_spec.input_definitions.artifacts[ input_name].artifact_type.CopyFrom( type_utils.get_artifact_type_schema_message(art.param_type)) for output_name, param_type in output_parameters.items(): pipeline_component_spec.output_definitions.parameters[ output_name].type = type_utils.get_parameter_type(param_type) for output_name, artifact_type in output_artifacts.items(): pipeline_component_spec.output_definitions.artifacts[ output_name].artifact_type.CopyFrom(artifact_type.get_ir_type()) pipeline_component_spec.executor_label = dsl_utils.sanitize_executor_label( task_name) # Iterate through the inputs/outputs specs to get pipeline task spec. for input_name, param in input_parameters.items(): if isinstance(param, dsl.PipelineParam) and param.op_name: # If the param has a valid op_name, this should be a pipeline parameter # produced by an upstream task. pipeline_task_spec.inputs.parameters[input_name].CopyFrom( pipeline_spec_pb2.TaskInputsSpec.InputParameterSpec( task_output_parameter=pipeline_spec_pb2.TaskInputsSpec. InputParameterSpec.TaskOutputParameterSpec( producer_task='task-{}'.format(param.op_name), output_parameter_key=param.name))) elif isinstance(param, dsl.PipelineParam) and not param.op_name: # If a valid op_name is missing, this should be a pipeline parameter. pipeline_task_spec.inputs.parameters[input_name].CopyFrom( pipeline_spec_pb2.TaskInputsSpec.InputParameterSpec( component_input_parameter=param.name)) else: # If this is not a pipeline param, then it should be a value. pipeline_task_spec.inputs.parameters[input_name].CopyFrom( pipeline_spec_pb2.TaskInputsSpec.InputParameterSpec( runtime_value=pipeline_spec_pb2.ValueOrRuntimeParameter( constant_value=dsl_utils.get_value(param)))) for input_name, art in input_artifacts.items(): if art.op_name: # If the param has a valid op_name, this should be an artifact produced # by an upstream task. pipeline_task_spec.inputs.artifacts[input_name].CopyFrom( pipeline_spec_pb2.TaskInputsSpec.InputArtifactSpec( task_output_artifact=pipeline_spec_pb2.TaskInputsSpec. InputArtifactSpec.TaskOutputArtifactSpec( producer_task='task-{}'.format(art.op_name), output_artifact_key=art.name))) else: # Otherwise, this should be from the input of the subdag. pipeline_task_spec.inputs.artifacts[input_name].CopyFrom( pipeline_spec_pb2.TaskInputsSpec.InputArtifactSpec( component_input_artifact=art.name)) # TODO: Add task dependencies/trigger policies/caching/iterator pipeline_task_spec.component_ref.name = dsl_utils.sanitize_component_name( task_name) # Construct dummy I/O declaration for the op. # TODO: resolve name conflict instead of raising errors. dummy_outputs = collections.OrderedDict() for output_name, _ in output_artifacts.items(): dummy_outputs[output_name] = _DUMMY_PATH for output_name, _ in output_parameters.items(): if output_name in dummy_outputs: raise KeyError( 'Got name collision for output key %s. Consider renaming ' 'either output parameters or output ' 'artifacts.' % output_name) dummy_outputs[output_name] = _DUMMY_PATH dummy_inputs = collections.OrderedDict() for input_name, art in input_artifacts.items(): dummy_inputs[input_name] = _DUMMY_PATH for input_name, param in input_parameters.items(): if input_name in dummy_inputs: raise KeyError( 'Got name collision for input key %s. Consider renaming ' 'either input parameters or input ' 'artifacts.' % input_name) dummy_inputs[input_name] = _DUMMY_PATH # Construct the AIP (Unified) custom job op. return AiPlatformCustomJobOp( name=task_name, custom_job_spec=job_spec, component_spec=pipeline_component_spec, task_spec=pipeline_task_spec, task_inputs=[ dsl.InputArgumentPath( argument=dummy_inputs[input_name], input=input_name, path=path, ) for input_name, path in dummy_inputs.items() ], task_outputs=dummy_outputs)
def _create_container_op_from_component_and_arguments( component_spec: _structures.ComponentSpec, arguments: Mapping[str, Any], component_ref: Optional[_structures.ComponentReference] = None, ) -> _container_op.ContainerOp: """Instantiates ContainerOp object. Args: component_spec: The component spec object. arguments: The dictionary of component arguments. component_ref: (only for v1) The component references. Returns: A ContainerOp instance. """ # Add component inputs with default value to the arguments dict if they are not # in the arguments dict already. arguments = arguments.copy() for input_spec in component_spec.inputs or []: if input_spec.name not in arguments and input_spec.default is not None: default_value = input_spec.default if input_spec.type == 'Integer': default_value = int(default_value) elif input_spec.type == 'Float': default_value = float(default_value) arguments[input_spec.name] = default_value # Check types of the reference arguments and serialize PipelineParams original_arguments = arguments arguments = arguments.copy() for input_name, argument_value in arguments.items(): if isinstance(argument_value, _pipeline_param.PipelineParam): input_type = component_spec._inputs_dict[input_name].type argument_type = argument_value.param_type types.verify_type_compatibility( argument_type, input_type, 'Incompatible argument passed to the input "{}" of component "{}": ' .format(input_name, component_spec.name)) arguments[input_name] = str(argument_value) if isinstance(argument_value, _container_op.ContainerOp): raise TypeError( 'ContainerOp object was passed to component as an input argument. ' 'Pass a single output instead.') placeholder_resolver = ExtraPlaceholderResolver() resolved_cmd = _components._resolve_command_line_and_paths( component_spec=component_spec, arguments=arguments, placeholder_resolver=placeholder_resolver.resolve_placeholder, ) container_spec = component_spec.implementation.container old_warn_value = _container_op.ContainerOp._DISABLE_REUSABLE_COMPONENT_WARNING _container_op.ContainerOp._DISABLE_REUSABLE_COMPONENT_WARNING = True output_paths = collections.OrderedDict(resolved_cmd.output_paths or {}) output_paths.update(placeholder_resolver.output_paths) input_paths = collections.OrderedDict(resolved_cmd.input_paths or {}) input_paths.update(placeholder_resolver.input_paths) artifact_argument_paths = [ dsl.InputArgumentPath( argument=arguments[input_name], input=input_name, path=path, ) for input_name, path in input_paths.items() ] task = _container_op.ContainerOp( name=component_spec.name or _components._default_component_name, image=container_spec.image, command=resolved_cmd.command, arguments=resolved_cmd.args, file_outputs=output_paths, artifact_argument_paths=artifact_argument_paths, ) _container_op.ContainerOp._DISABLE_REUSABLE_COMPONENT_WARNING = old_warn_value component_meta = copy.copy(component_spec) task._set_metadata(component_meta) if component_ref: component_ref_without_spec = copy.copy(component_ref) component_ref_without_spec.spec = None task._component_ref = component_ref_without_spec task._parameter_arguments = resolved_cmd.inputs_consumed_by_value # Previously, ContainerOp had strict requirements for the output names, so we # had to convert all the names before passing them to the ContainerOp # constructor. # Outputs with non-pythonic names could not be accessed using their original # names. Now ContainerOp supports any output names, so we're now using the # original output names. However to support legacy pipelines, we're also # adding output references with pythonic names. # TODO: Add warning when people use the legacy output names. output_names = [ output_spec.name for output_spec in component_spec.outputs or [] ] # Stabilizing the ordering output_name_to_python = _naming.generate_unique_name_conversion_table( output_names, _naming._sanitize_python_function_name) for output_name in output_names: pythonic_output_name = output_name_to_python[output_name] # Note: Some component outputs are currently missing from task.outputs # (e.g. MLPipeline UI Metadata) if pythonic_output_name not in task.outputs and output_name in task.outputs: task.outputs[pythonic_output_name] = task.outputs[output_name] if container_spec.env: from kubernetes import client as k8s_client for name, value in container_spec.env.items(): task.container.add_env_variable( k8s_client.V1EnvVar(name=name, value=value)) if component_spec.metadata: annotations = component_spec.metadata.annotations or {} for key, value in annotations.items(): task.add_pod_annotation(key, value) for key, value in (component_spec.metadata.labels or {}).items(): task.add_pod_label(key, value) # Disabling the caching for the volatile components by default if annotations.get('volatile_component', 'false') == 'true': task.execution_options.caching_strategy.max_cache_staleness = 'P0D' _attach_v2_specs(task, component_spec, original_arguments) return task
def create_container_op_from_component_and_arguments( component_spec: structures.ComponentSpec, arguments: Mapping[str, Any], component_ref: structures.ComponentReference = None, ) -> container_op.ContainerOp: """Instantiates ContainerOp object. Args: component_spec: The component spec object. arguments: The dictionary of component arguments. component_ref: The component reference. Optional. Returns: A ContainerOp instance. """ pipeline_task_spec = pipeline_spec_pb2.PipelineTaskSpec() pipeline_task_spec.task_info.name = component_spec.name # might need to append suffix to exuector_label to ensure its uniqueness? pipeline_task_spec.executor_label = component_spec.name # Keep track of auto-injected importer spec. importer_spec = {} # Check types of the reference arguments and serialize PipelineParams arguments = arguments.copy() for input_name, argument_value in arguments.items(): if isinstance(argument_value, dsl.PipelineParam): input_type = component_spec._inputs_dict[input_name].type reference_type = argument_value.param_type types.verify_type_compatibility( reference_type, input_type, 'Incompatible argument passed to the input "{}" of component "{}": ' .format(input_name, component_spec.name)) arguments[input_name] = str(argument_value) if type_utils.is_parameter_type(input_type): if argument_value.op_name: pipeline_task_spec.inputs.parameters[ input_name].task_output_parameter.producer_task = ( argument_value.op_name) pipeline_task_spec.inputs.parameters[ input_name].task_output_parameter.output_parameter_key = ( argument_value.name) else: pipeline_task_spec.inputs.parameters[ input_name].runtime_value.runtime_parameter = argument_value.name else: if argument_value.op_name: pipeline_task_spec.inputs.artifacts[ input_name].producer_task = (argument_value.op_name) pipeline_task_spec.inputs.artifacts[ input_name].output_artifact_key = (argument_value.name) else: # argument_value.op_name could be none, in which case an importer node # will be inserted later. pipeline_task_spec.inputs.artifacts[ input_name].producer_task = '' type_schema = type_utils.get_input_artifact_type_schema( input_name, component_spec.inputs) importer_spec[ input_name] = importer_node.build_importer_spec( input_type_schema=type_schema, pipeline_param_name=argument_value.name) elif isinstance(argument_value, str): input_type = component_spec._inputs_dict[input_name].type if type_utils.is_parameter_type(input_type): pipeline_task_spec.inputs.parameters[ input_name].runtime_value.constant_value.string_value = ( argument_value) else: # An importer node with constant value artifact_uri will be inserted. pipeline_task_spec.inputs.artifacts[ input_name].producer_task = '' type_schema = type_utils.get_input_artifact_type_schema( input_name, component_spec.inputs) importer_spec[input_name] = importer_node.build_importer_spec( input_type_schema=type_schema, constant_value=argument_value) elif isinstance(argument_value, int): pipeline_task_spec.inputs.parameters[ input_name].runtime_value.constant_value.int_value = argument_value elif isinstance(argument_value, float): pipeline_task_spec.inputs.parameters[ input_name].runtime_value.constant_value.double_value = argument_value elif isinstance(argument_value, dsl.ContainerOp): raise TypeError( 'ContainerOp object {} was passed to component as an input argument. ' 'Pass a single output instead.'.format(input_name)) else: raise NotImplementedError( 'Input argument supports only the following types: PipelineParam' ', str, int, float. Got: "{}".'.format(argument_value)) for output in component_spec.outputs or []: if type_utils.is_parameter_type(output.type): pipeline_task_spec.outputs.parameters[ output.name].type = type_utils.get_parameter_type(output.type) else: pipeline_task_spec.outputs.artifacts[ output.name].artifact_type.instance_schema = ( type_utils.get_artifact_type_schema(output.type)) inputs_dict = { input_spec.name: input_spec for input_spec in component_spec.inputs or [] } outputs_dict = { output_spec.name: output_spec for output_spec in component_spec.outputs or [] } def _input_artifact_uri_placeholder(input_key: str) -> str: if type_utils.is_parameter_type(inputs_dict[input_key].type): raise TypeError( 'Input "{}" with type "{}" cannot be paired with InputUriPlaceholder.' .format(input_key, inputs_dict[input_key].type)) else: return "{{{{$.inputs.artifacts['{}'].uri}}}}".format(input_key) def _input_artifact_path_placeholder(input_key: str) -> str: if type_utils.is_parameter_type(inputs_dict[input_key].type): raise TypeError( 'Input "{}" with type "{}" cannot be paired with InputPathPlaceholder.' .format(input_key, inputs_dict[input_key].type)) else: return "{{{{$.inputs.artifacts['{}'].path}}}}".format(input_key) def _input_parameter_placeholder(input_key: str) -> str: if type_utils.is_parameter_type(inputs_dict[input_key].type): return "{{{{$.inputs.parameters['{}']}}}}".format(input_key) else: raise TypeError( 'Input "{}" with type "{}" cannot be paired with InputValuePlaceholder.' .format(input_key, inputs_dict[input_key].type)) def _output_artifact_uri_placeholder(output_key: str) -> str: if type_utils.is_parameter_type(outputs_dict[output_key].type): raise TypeError( 'Output "{}" with type "{}" cannot be paired with OutputUriPlaceholder.' .format(output_key, outputs_dict[output_key].type)) else: return "{{{{$.outputs.artifacts['{}'].uri}}}}".format(output_key) def _output_artifact_path_placeholder(output_key: str) -> str: return "{{{{$.outputs.artifacts['{}'].path}}}}".format(output_key) def _output_parameter_path_placeholder(output_key: str) -> str: return "{{{{$.outputs.parameters['{}'].output_file}}}}".format( output_key) def _resolve_output_path_placeholder(output_key: str) -> str: if type_utils.is_parameter_type(outputs_dict[output_key].type): return _output_parameter_path_placeholder(output_key) else: return _output_artifact_path_placeholder(output_key) resolved_cmd = _resolve_command_line_and_paths( component_spec=component_spec, arguments=arguments, input_value_generator=_input_parameter_placeholder, input_uri_generator=_input_artifact_uri_placeholder, output_uri_generator=_output_artifact_uri_placeholder, input_path_generator=_input_artifact_path_placeholder, output_path_generator=_resolve_output_path_placeholder, ) container_spec = component_spec.implementation.container pipeline_container_spec = ( pipeline_spec_pb2.PipelineDeploymentConfig.PipelineContainerSpec()) pipeline_container_spec.image = container_spec.image pipeline_container_spec.command.extend(resolved_cmd.command) pipeline_container_spec.args.extend(resolved_cmd.args) output_uris_and_paths = resolved_cmd.output_uris.copy() output_uris_and_paths.update(resolved_cmd.output_paths) input_uris_and_paths = resolved_cmd.input_uris.copy() input_uris_and_paths.update(resolved_cmd.input_paths) old_warn_value = dsl.ContainerOp._DISABLE_REUSABLE_COMPONENT_WARNING dsl.ContainerOp._DISABLE_REUSABLE_COMPONENT_WARNING = True task = container_op.ContainerOp( name=component_spec.name or _default_component_name, image=container_spec.image, command=resolved_cmd.command, arguments=resolved_cmd.args, file_outputs=output_uris_and_paths, artifact_argument_paths=[ dsl.InputArgumentPath( argument=arguments[input_name], input=input_name, path=path, ) for input_name, path in input_uris_and_paths.items() ], ) task.task_spec = pipeline_task_spec task.importer_spec = importer_spec task.container_spec = pipeline_container_spec dsl.ContainerOp._DISABLE_REUSABLE_COMPONENT_WARNING = old_warn_value component_meta = copy.copy(component_spec) task._set_metadata(component_meta) component_ref_without_spec = copy.copy(component_ref) component_ref_without_spec.spec = None task._component_ref = component_ref_without_spec # Previously, ContainerOp had strict requirements for the output names, so we # had to convert all the names before passing them to the ContainerOp # constructor. Outputs with non-pythonic names could not be accessed using # their original names. Now ContainerOp supports any output names, so we're # now using the original output names. However to support legacy pipelines, # we're also adding output references with pythonic names. # TODO: Add warning when people use the legacy output names. output_names = [ output_spec.name for output_spec in component_spec.outputs or [] ] # Stabilizing the ordering output_name_to_python = generate_unique_name_conversion_table( output_names, _sanitize_python_function_name) for output_name in output_names: pythonic_output_name = output_name_to_python[output_name] # Note: Some component outputs are currently missing from task.outputs # (e.g. MLPipeline UI Metadata) if pythonic_output_name not in task.outputs and output_name in task.outputs: task.outputs[pythonic_output_name] = task.outputs[output_name] if component_spec.metadata: annotations = component_spec.metadata.annotations or {} for key, value in annotations.items(): task.add_pod_annotation(key, value) for key, value in (component_spec.metadata.labels or {}).items(): task.add_pod_label(key, value) # Disabling the caching for the volatile components by default if annotations.get('volatile_component', 'false') == 'true': task.execution_options.caching_strategy.max_cache_staleness = 'P0D' return task
def iris_train_pipeline( kernel: dsl.PipelineParam = dsl.PipelineParam( name='kernel', value='linear, poly, rbf, sigmoid or precomputed'), C: dsl.PipelineParam = dsl.PipelineParam( name='C', value='Float value, default value is 1'), n_neighbors: dsl.PipelineParam = dsl.PipelineParam(name='n_neighbors', value='int value'), n_splits: dsl.PipelineParam = dsl.PipelineParam( name='n_splits', value="Number of splits for fold"), location: dsl.PipelineParam = dsl.PipelineParam( name='location', value='FOLDER_NAME_TO_MODELS'), svm_filename: dsl.PipelineParam = dsl.PipelineParam( name='svm-filename', value='SVM_NAME'), lr_filename: dsl.PipelineParam = dsl.PipelineParam( name='logistic-regression-filename', value='LOGISTIC_REGRESSION_NAME'), dt_filename: dsl.PipelineParam = dsl.PipelineParam( name='decision-tree-filename', value='DECISION_TREE_NAME'), knn_filename: dsl.PipelineParam = dsl.PipelineParam( name='knn-filename', value='KNN_NAME'), label1: dsl.PipelineParam = dsl.PipelineParam(name='labels', value='Label 1'), label2: dsl.PipelineParam = dsl.PipelineParam(name='labels', value='Label 2'), label3: dsl.PipelineParam = dsl.PipelineParam(name='labels', value='Label 3')): _load_data = load_op() _transform = transform_op(dsl.InputArgumentPath( _load_data.outputs['iris'])).after(_load_data) _svm = svm_op( str(svm_filename) + '.pkl', dsl.InputArgumentPath(_transform.outputs['X_train']), dsl.InputArgumentPath(_transform.outputs['y_train']), dsl.InputArgumentPath(_transform.outputs['X_test']), kernel, C, n_splits).after(_transform) _lr = lr_op(dsl.InputArgumentPath(_transform.outputs['X_train']), dsl.InputArgumentPath(_transform.outputs['y_train']), dsl.InputArgumentPath(_transform.outputs['X_test']), str(lr_filename) + '.pkl', n_splits).after(_transform) _dt = dt_op(dsl.InputArgumentPath(_transform.outputs['X_train']), dsl.InputArgumentPath(_transform.outputs['y_train']), dsl.InputArgumentPath(_transform.outputs['X_test']), str(dt_filename) + '.pkl', n_splits).after(_transform) _knn = knn_op( dsl.InputArgumentPath(_transform.outputs['X_train']), dsl.InputArgumentPath(_transform.outputs['y_train']), dsl.InputArgumentPath(_transform.outputs['X_test']), n_neighbors, n_splits, str(knn_filename) + '.pkl', ).after(_transform) models = [ dsl.InputArgumentPath(_svm.outputs['svm_model']), dsl.InputArgumentPath(_lr.outputs['lr_model']), dsl.InputArgumentPath(_dt.outputs['dt_model']), dsl.InputArgumentPath(_knn.outputs['knn_model']), ] _save_s3 = save_s3_op( models, location, [svm_filename, lr_filename, dt_filename, knn_filename]).after( _svm, _lr, _dt, _knn).apply(aws.use_aws_secret(secret_name='s3-secrets')) _evaluation_knn = evaluation_op( dsl.InputArgumentPath(_knn.outputs['knn_predict']), dsl.InputArgumentPath(_transform.outputs['y_test']), [label1, label2, label3], dsl.InputArgumentPath(_transform.outputs['y_train']), dsl.InputArgumentPath(_knn.outputs['knn_y_scores'])).after(_knn) _evaluation_dt = evaluation_op( dsl.InputArgumentPath(_dt.outputs['dt_predict']), dsl.InputArgumentPath(_transform.outputs['y_test']), [label1, label2, label3], dsl.InputArgumentPath(_transform.outputs['y_train']), dsl.InputArgumentPath(_dt.outputs['dt_y_scores'])).after(_dt) _evaluation_svm = evaluation_op( dsl.InputArgumentPath(_svm.outputs['svm_predict']), dsl.InputArgumentPath(_transform.outputs['y_test']), [label1, label2, label3], dsl.InputArgumentPath(_transform.outputs['y_train']), dsl.InputArgumentPath(_dt.outputs['svm_y_scores'])).after(_svm) _evaluation_svm = evaluation_op( dsl.InputArgumentPath(_lr.outputs['lr_predict']), dsl.InputArgumentPath(_transform.outputs['y_test']), [label1, label2, label3], dsl.InputArgumentPath(_transform.outputs['y_train']), dsl.InputArgumentPath(_dt.outputs['lr_y_scores'])).after(_lr)