def start_bundle(self): transform = self._applied_ptransform.transform self._tagged_receivers = _TaggedReceivers(self._evaluation_context) for output_tag in self._applied_ptransform.outputs: output_pcollection = pvalue.PCollection(None, tag=output_tag) output_pcollection.producer = self._applied_ptransform self._tagged_receivers[output_tag] = ( self._evaluation_context.create_bundle(output_pcollection)) self._tagged_receivers[output_tag].tag = output_tag self._counter_factory = counters.CounterFactory() # TODO(aaltay): Consider storing the serialized form as an optimization. dofn = pickler.loads(pickler.dumps(transform.dofn)) pipeline_options = self._evaluation_context.pipeline_options if (pipeline_options is not None and pipeline_options.view_as(TypeOptions).runtime_type_check): dofn = TypeCheckWrapperDoFn(dofn, transform.get_type_hints()) dofn = OutputCheckWrapperDoFn(dofn, self._applied_ptransform.full_label) self.runner = DoFnRunner( dofn, transform.args, transform.kwargs, self._side_inputs, self._applied_ptransform.inputs[0].windowing, tagged_receivers=self._tagged_receivers, step_name=self._applied_ptransform.full_label, state=DoFnState(self._counter_factory), scoped_metrics_container=self.scoped_metrics_container) self.runner.start()
def start_bundle(self): transform = self._applied_ptransform.transform self._tagged_receivers = _TaggedReceivers(self._evaluation_context) if isinstance(self._applied_ptransform.parent.transform, core._MultiParDo): # pylint: disable=protected-access do_outputs_tuple = self._applied_ptransform.parent.outputs[0] assert isinstance(do_outputs_tuple, pvalue.DoOutputsTuple) main_output_pcollection = do_outputs_tuple[ do_outputs_tuple._main_tag] # pylint: disable=protected-access for side_output_tag in transform.side_output_tags: output_pcollection = do_outputs_tuple[side_output_tag] self._tagged_receivers[side_output_tag] = ( self._evaluation_context.create_bundle(output_pcollection)) self._tagged_receivers[side_output_tag].tag = side_output_tag else: assert len(self._outputs) == 1 main_output_pcollection = list(self._outputs)[0] self._tagged_receivers[None] = self._evaluation_context.create_bundle( main_output_pcollection) self._tagged_receivers[None].tag = None # main_tag is None. self._counter_factory = counters.CounterFactory() # TODO(aaltay): Consider storing the serialized form as an optimization. dofn = pickler.loads(pickler.dumps(transform.dofn)) pipeline_options = self._evaluation_context.pipeline_options if (pipeline_options is not None and pipeline_options.view_as(TypeOptions).runtime_type_check): # TODO(sourabhbajaj): Remove this if-else if isinstance(dofn, core.NewDoFn): dofn = TypeCheckWrapperNewDoFn(dofn, transform.get_type_hints()) else: dofn = TypeCheckWrapperDoFn(dofn, transform.get_type_hints()) # TODO(sourabhbajaj): Remove this if-else if isinstance(dofn, core.NewDoFn): dofn = OutputCheckWrapperNewDoFn( dofn, self._applied_ptransform.full_label) else: dofn = OutputCheckWrapperDoFn(dofn, self._applied_ptransform.full_label) self.runner = DoFnRunner( dofn, transform.args, transform.kwargs, self._side_inputs, self._applied_ptransform.inputs[0].windowing, tagged_receivers=self._tagged_receivers, step_name=self._applied_ptransform.full_label, state=DoFnState(self._counter_factory), scoped_metrics_container=self.scoped_metrics_container) self.runner.start()