def importer( artifact_uri: Union[pipeline_channel.PipelineParameterChannel, str], artifact_class: Type[artifact_types.Artifact], reimport: bool = False, metadata: Optional[Mapping[str, Any]] = None, ) -> pipeline_task.PipelineTask: """dsl.importer for importing an existing artifact. Only for v2 pipeline. Args: artifact_uri: The artifact uri to import from. artifact_type_schema: The user specified artifact type schema of the artifact to be imported. reimport: Whether to reimport the artifact. Defaults to False. metadata: Properties of the artifact. Returns: A PipelineTask instance. Raises: ValueError if the passed in artifact_uri is neither a PipelineParam nor a constant string value. """ component_spec = structures.ComponentSpec( name='importer', implementation=structures.Implementation( importer=structures.ImporterSpec( artifact_uri=placeholders.input_parameter_placeholder( INPUT_KEY), type_schema=artifact_class.TYPE_NAME, reimport=reimport, metadata=metadata)), inputs={INPUT_KEY: structures.InputSpec(type='String')}, outputs={ OUTPUT_KEY: structures.OutputSpec(type=artifact_class.__name__) }, ) importer = importer_component.ImporterComponent( component_spec=component_spec) return importer(uri=artifact_uri)
def build_task_spec_for_task( task: pipeline_task.PipelineTask, parent_component_inputs: pipeline_spec_pb2.ComponentInputsSpec, tasks_in_current_dag: List[str], input_parameters_in_current_dag: List[str], input_artifacts_in_current_dag: List[str], ) -> pipeline_spec_pb2.PipelineTaskSpec: """Builds PipelineTaskSpec for a pipeline task. A task input may reference an output outside its immediate DAG. For instance:: random_num = random_num_op(...) with dsl.Condition(random_num.output > 5): print_op('%s > 5' % random_num.output) In this example, `dsl.Condition` forms a subDAG with one task from `print_op` inside the subDAG. The task of `print_op` references output from `random_num` task, which is outside the sub-DAG. When compiling to IR, such cross DAG reference is disallowed. So we need to "punch a hole" in the sub-DAG to make the input available in the subDAG component inputs if it's not already there, Next, we can call this method to fix the tasks inside the subDAG to make them reference the component inputs instead of directly referencing the original producer task. Args: task: The task to build a PipelineTaskSpec for. parent_component_inputs: The task's parent component's input specs. tasks_in_current_dag: The list of tasks names for tasks in the same dag. input_parameters_in_current_dag: The list of input parameters in the DAG component. input_artifacts_in_current_dag: The list of input artifacts in the DAG component. Returns: A PipelineTaskSpec object representing the task. """ pipeline_task_spec = pipeline_spec_pb2.PipelineTaskSpec() pipeline_task_spec.task_info.name = ( task.task_spec.display_name or task.name) # Use task.name for component_ref.name because we may customize component # spec for individual tasks to work around the lack of optional inputs # support in IR. pipeline_task_spec.component_ref.name = ( component_utils.sanitize_component_name(task.name)) pipeline_task_spec.caching_options.enable_cache = ( task.task_spec.enable_caching) for input_name, input_value in task.inputs.items(): input_type = task.component_spec.inputs[input_name].type if isinstance(input_value, pipeline_channel.PipelineArtifactChannel): if input_value.task_name: # Value is produced by an upstream task. if input_value.task_name in tasks_in_current_dag: # Dependent task within the same DAG. pipeline_task_spec.inputs.artifacts[ input_name].task_output_artifact.producer_task = ( component_utils.sanitize_task_name( input_value.task_name)) pipeline_task_spec.inputs.artifacts[ input_name].task_output_artifact.output_artifact_key = ( input_value.name) else: # Dependent task not from the same DAG. component_input_artifact = ( _additional_input_name_for_pipeline_channel(input_value) ) assert component_input_artifact in parent_component_inputs.artifacts, \ 'component_input_artifact: {} not found. All inputs: {}'.format( component_input_artifact, parent_component_inputs) pipeline_task_spec.inputs.artifacts[ input_name].component_input_artifact = ( component_input_artifact) else: raise RuntimeError( f'Artifacts must be produced by a task. Got {input_value}.') elif isinstance(input_value, pipeline_channel.PipelineParameterChannel): if input_value.task_name: # Value is produced by an upstream task. if input_value.task_name in tasks_in_current_dag: # Dependent task within the same DAG. pipeline_task_spec.inputs.parameters[ input_name].task_output_parameter.producer_task = ( component_utils.sanitize_task_name( input_value.task_name)) pipeline_task_spec.inputs.parameters[ input_name].task_output_parameter.output_parameter_key = ( input_value.name) else: # Dependent task not from the same DAG. component_input_parameter = ( _additional_input_name_for_pipeline_channel(input_value) ) assert component_input_parameter in parent_component_inputs.parameters, \ 'component_input_parameter: {} not found. All inputs: {}'.format( component_input_parameter, parent_component_inputs) pipeline_task_spec.inputs.parameters[ input_name].component_input_parameter = ( component_input_parameter) else: # Value is from pipeline input. component_input_parameter = input_value.full_name if component_input_parameter not in parent_component_inputs.parameters: component_input_parameter = ( _additional_input_name_for_pipeline_channel(input_value) ) pipeline_task_spec.inputs.parameters[ input_name].component_input_parameter = ( component_input_parameter) elif isinstance(input_value, for_loop.LoopArgument): component_input_parameter = ( _additional_input_name_for_pipeline_channel(input_value)) assert component_input_parameter in parent_component_inputs.parameters, \ 'component_input_parameter: {} not found. All inputs: {}'.format( component_input_parameter, parent_component_inputs) pipeline_task_spec.inputs.parameters[ input_name].component_input_parameter = ( component_input_parameter) elif isinstance(input_value, for_loop.LoopArgumentVariable): component_input_parameter = ( _additional_input_name_for_pipeline_channel( input_value.loop_argument)) assert component_input_parameter in parent_component_inputs.parameters, \ 'component_input_parameter: {} not found. All inputs: {}'.format( component_input_parameter, parent_component_inputs) pipeline_task_spec.inputs.parameters[ input_name].component_input_parameter = ( component_input_parameter) pipeline_task_spec.inputs.parameters[ input_name].parameter_expression_selector = ( 'parseJson(string_value)["{}"]'.format( input_value.subvar_name)) elif isinstance(input_value, str): # Handle extra input due to string concat pipeline_channels = ( pipeline_channel.extract_pipeline_channels_from_any(input_value) ) for channel in pipeline_channels: # value contains PipelineChannel placeholders which needs to be # replaced. And the input needs to be added to the task spec. # Form the name for the compiler injected input, and make sure it # doesn't collide with any existing input names. additional_input_name = ( _additional_input_name_for_pipeline_channel(channel)) # We don't expect collision to happen because we prefix the name # of additional input with 'pipelinechannel--'. But just in case # collision did happend, throw a RuntimeError so that we don't # get surprise at runtime. for existing_input_name, _ in task.inputs.items(): if existing_input_name == additional_input_name: raise RuntimeError( 'Name collision between existing input name ' '{} and compiler injected input name {}'.format( existing_input_name, additional_input_name)) additional_input_placeholder = ( placeholders.input_parameter_placeholder( additional_input_name)) input_value = input_value.replace(channel.pattern, additional_input_placeholder) if channel.task_name: # Value is produced by an upstream task. if channel.task_name in tasks_in_current_dag: # Dependent task within the same DAG. pipeline_task_spec.inputs.parameters[ additional_input_name].task_output_parameter.producer_task = ( component_utils.sanitize_task_name( channel.task_name)) pipeline_task_spec.inputs.parameters[ input_name].task_output_parameter.output_parameter_key = ( channel.name) else: # Dependent task not from the same DAG. component_input_parameter = ( _additional_input_name_for_pipeline_channel(channel) ) assert component_input_parameter in parent_component_inputs.parameters, \ 'component_input_parameter: {} not found. All inputs: {}'.format( component_input_parameter, parent_component_inputs) pipeline_task_spec.inputs.parameters[ additional_input_name].component_input_parameter = ( component_input_parameter) else: # Value is from pipeline input. (or loop?) component_input_parameter = channel.full_name if component_input_parameter not in parent_component_inputs.parameters: component_input_parameter = ( _additional_input_name_for_pipeline_channel(channel) ) pipeline_task_spec.inputs.parameters[ additional_input_name].component_input_parameter = ( component_input_parameter) pipeline_task_spec.inputs.parameters[ input_name].runtime_value.constant.string_value = input_value elif isinstance(input_value, (str, int, float, bool, dict, list)): pipeline_task_spec.inputs.parameters[ input_name].runtime_value.constant.CopyFrom( _to_protobuf_value(input_value)) else: raise ValueError( 'Input argument supports only the following types: ' 'str, int, float, bool, dict, and list.' f'Got {input_value} of type {type(input_value)}.') return pipeline_task_spec
def expand_command_part(arg) -> Union[str, List[str], None]: if arg is None: return None if isinstance(arg, (str, int, float, bool)): return str(arg) elif isinstance(arg, (dict, list)): return json.dumps(arg) elif isinstance(arg, structures.InputValuePlaceholder): input_name = arg.input_name if not type_utils.is_parameter_type( inputs_dict[input_name].type): raise TypeError( f'Input "{input_name}" with type ' f'"{inputs_dict[input_name].type}" cannot be paired with ' 'InputValuePlaceholder.') if input_name in arguments: return placeholders.input_parameter_placeholder(input_name) else: input_spec = inputs_dict[input_name] if input_spec.default is not None: return None else: raise ValueError( f'No value provided for input: {input_name}.') elif isinstance(arg, structures.InputUriPlaceholder): input_name = arg.input_name if type_utils.is_parameter_type(inputs_dict[input_name].type): raise TypeError( f'Input "{input_name}" with type ' f'"{inputs_dict[input_name].type}" cannot be paired with ' 'InputUriPlaceholder.') if input_name in arguments: input_uri = placeholders.input_artifact_uri_placeholder( input_name) return input_uri else: input_spec = inputs_dict[input_name] if input_spec.default is not None: return None else: raise ValueError( f'No value provided for input: {input_name}.') elif isinstance(arg, structures.InputPathPlaceholder): input_name = arg.input_name if type_utils.is_parameter_type(inputs_dict[input_name].type): raise TypeError( f'Input "{input_name}" with type ' f'"{inputs_dict[input_name].type}" cannot be paired with ' 'InputPathPlaceholder.') if input_name in arguments: input_path = placeholders.input_artifact_path_placeholder( input_name) return input_path else: input_spec = inputs_dict[input_name] if input_spec.optional: return None else: raise ValueError( f'No value provided for input: {input_name}.') elif isinstance(arg, structures.OutputUriPlaceholder): output_name = arg.output_name if type_utils.is_parameter_type(outputs_dict[output_name].type): raise TypeError( f'Onput "{output_name}" with type ' f'"{outputs_dict[output_name].type}" cannot be paired with ' 'OutputUriPlaceholder.') output_uri = placeholders.output_artifact_uri_placeholder( output_name) return output_uri elif isinstance(arg, structures.OutputPathPlaceholder): output_name = arg.output_name if type_utils.is_parameter_type(outputs_dict[output_name].type): output_path = placeholders.output_parameter_path_placeholder( output_name) else: output_path = placeholders.output_artifact_path_placeholder( output_name) return output_path elif isinstance(arg, structures.ConcatPlaceholder): expanded_argument_strings = expand_argument_list(arg.items) return ''.join(expanded_argument_strings) elif isinstance(arg, structures.IfPresentPlaceholder): if arg.if_structure.input_name in argument_values: result_node = arg.if_structure.then else: result_node = arg.if_structure.otherwise if result_node is None: return [] if isinstance(result_node, list): expanded_result = expand_argument_list(result_node) else: expanded_result = expand_command_part(result_node) return expanded_result else: raise TypeError('Unrecognized argument type: {}'.format(arg))
def test_input_parameter_placeholder(self): self.assertEqual( "{{$.inputs.parameters['input1']}}", placeholders.input_parameter_placeholder('input1'), )