def _add_step(self, step_kind, step_label, transform_node, side_tags=()): """Creates a Step object and adds it to the cache.""" # Import here to avoid adding the dependency for local running scenarios. # pylint: disable=wrong-import-order, wrong-import-position from apache_beam.runners.dataflow.internal import apiclient step = apiclient.Step(step_kind, self._get_unique_step_name()) self.job.proto.steps.append(step.proto) step.add_property(PropertyNames.USER_NAME, step_label) # Cache the node/step association for the main output of the transform node. self._cache.cache_output(transform_node, None, step) # If side_tags is not () then this is a multi-output transform node and we # need to cache the (node, tag, step) for each of the tags used to access # the outputs. This is essential because the keys used to search in the # cache always contain the tag. for tag in side_tags: self._cache.cache_output(transform_node, tag, step) # Finally, we add the display data items to the pipeline step. # If the transform contains no display data then an empty list is added. step.add_property( PropertyNames.DISPLAY_DATA, [item.get_dict() for item in DisplayData.create_from(transform_node.transform).items]) return step
def _add_singleton_step(self, label, full_label, tag, input_step): """Creates a CollectionToSingleton step used to handle ParDo side inputs.""" # Import here to avoid adding the dependency for local running scenarios. from apache_beam.runners.dataflow.internal import apiclient step = apiclient.Step(TransformNames.COLLECTION_TO_SINGLETON, label) self.job.proto.steps.append(step.proto) step.add_property(PropertyNames.USER_NAME, full_label) step.add_property( PropertyNames.PARALLEL_INPUT, {'@type': 'OutputReference', PropertyNames.STEP_NAME: input_step.proto.name, PropertyNames.OUTPUT_NAME: input_step.get_output(tag)}) step.encoding = self._get_side_input_encoding(input_step.encoding) step.add_property( PropertyNames.OUTPUT_INFO, [{PropertyNames.USER_NAME: ( '%s.%s' % (full_label, PropertyNames.OUTPUT)), PropertyNames.ENCODING: step.encoding, PropertyNames.OUTPUT_NAME: PropertyNames.OUT}]) return step