def start_bundle(self): transform = self._applied_ptransform.transform self._tagged_receivers = _TaggedReceivers(self._evaluation_context) for output_tag in self._applied_ptransform.outputs: output_pcollection = pvalue.PCollection(None, tag=output_tag) output_pcollection.producer = self._applied_ptransform self._tagged_receivers[output_tag] = ( self._evaluation_context.create_bundle(output_pcollection)) self._tagged_receivers[output_tag].tag = output_tag self._counter_factory = counters.CounterFactory() # TODO(aaltay): Consider storing the serialized form as an optimization. dofn = pickler.loads(pickler.dumps(transform.dofn)) pipeline_options = self._evaluation_context.pipeline_options if (pipeline_options is not None and pipeline_options.view_as(TypeOptions).runtime_type_check): dofn = TypeCheckWrapperDoFn(dofn, transform.get_type_hints()) dofn = OutputCheckWrapperDoFn(dofn, self._applied_ptransform.full_label) self.runner = DoFnRunner( dofn, transform.args, transform.kwargs, self._side_inputs, self._applied_ptransform.inputs[0].windowing, tagged_receivers=self._tagged_receivers, step_name=self._applied_ptransform.full_label, state=DoFnState(self._counter_factory), scoped_metrics_container=self.scoped_metrics_container) self.runner.start()
def start_bundle(self): transform = self._applied_ptransform.transform self._tagged_receivers = _TaggedReceivers(self._evaluation_context) for output_tag in self._applied_ptransform.outputs: output_pcollection = pvalue.PCollection(None, tag=output_tag) output_pcollection.producer = self._applied_ptransform self._tagged_receivers[output_tag] = ( self._evaluation_context.create_bundle(output_pcollection)) self._tagged_receivers[output_tag].tag = output_tag self._counter_factory = counters.CounterFactory() # TODO(aaltay): Consider storing the serialized form as an optimization. dofn = (pickler.loads(pickler.dumps(transform.dofn)) if self._perform_dofn_pickle_test else transform.dofn) args = transform.args if hasattr(transform, 'args') else [] kwargs = transform.kwargs if hasattr(transform, 'kwargs') else {} self.runner = DoFnRunner(dofn, args, kwargs, self._side_inputs, self._applied_ptransform.inputs[0].windowing, tagged_receivers=self._tagged_receivers, step_name=self._applied_ptransform.full_label, state=DoFnState(self._counter_factory)) self.runner.start()
def start_bundle(self): transform = self._applied_ptransform.transform self._tagged_receivers = _TaggedReceivers(self._evaluation_context) if isinstance(self._applied_ptransform.parent.transform, core._MultiParDo): # pylint: disable=protected-access do_outputs_tuple = self._applied_ptransform.parent.outputs[0] assert isinstance(do_outputs_tuple, pvalue.DoOutputsTuple) main_output_pcollection = do_outputs_tuple[ do_outputs_tuple._main_tag] # pylint: disable=protected-access for side_output_tag in transform.side_output_tags: output_pcollection = do_outputs_tuple[side_output_tag] self._tagged_receivers[side_output_tag] = ( self._evaluation_context.create_bundle(output_pcollection)) self._tagged_receivers[side_output_tag].tag = side_output_tag else: assert len(self._outputs) == 1 main_output_pcollection = list(self._outputs)[0] self._tagged_receivers[None] = self._evaluation_context.create_bundle( main_output_pcollection) self._tagged_receivers[None].tag = None # main_tag is None. self._counter_factory = counters.CounterFactory() # TODO(aaltay): Consider storing the serialized form as an optimization. dofn = pickler.loads(pickler.dumps(transform.dofn)) pipeline_options = self._evaluation_context.pipeline_options if (pipeline_options is not None and pipeline_options.view_as(TypeOptions).runtime_type_check): # TODO(sourabhbajaj): Remove this if-else if isinstance(dofn, core.NewDoFn): dofn = TypeCheckWrapperNewDoFn(dofn, transform.get_type_hints()) else: dofn = TypeCheckWrapperDoFn(dofn, transform.get_type_hints()) # TODO(sourabhbajaj): Remove this if-else if isinstance(dofn, core.NewDoFn): dofn = OutputCheckWrapperNewDoFn( dofn, self._applied_ptransform.full_label) else: dofn = OutputCheckWrapperDoFn(dofn, self._applied_ptransform.full_label) self.runner = DoFnRunner( dofn, transform.args, transform.kwargs, self._side_inputs, self._applied_ptransform.inputs[0].windowing, tagged_receivers=self._tagged_receivers, step_name=self._applied_ptransform.full_label, state=DoFnState(self._counter_factory), scoped_metrics_container=self.scoped_metrics_container) self.runner.start()
def start_bundle(self): transform = self._applied_ptransform.transform self._tagged_receivers = _TaggedReceivers(self._evaluation_context) for output_tag in self._applied_ptransform.outputs: output_pcollection = pvalue.PCollection(None, tag=output_tag) output_pcollection.producer = self._applied_ptransform self._tagged_receivers[output_tag] = ( self._evaluation_context.create_bundle(output_pcollection)) self._tagged_receivers[output_tag].tag = output_tag self._counter_factory = counters.CounterFactory() # TODO(aaltay): Consider storing the serialized form as an optimization. dofn = ( pickler.loads(pickler.dumps(transform.dofn)) if self._perform_dofn_pickle_test else transform.dofn) args = transform.args if hasattr(transform, 'args') else [] kwargs = transform.kwargs if hasattr(transform, 'kwargs') else {} self.user_state_context = None self.user_timer_map = {} if is_stateful_dofn(dofn): kv_type_hint = self._applied_ptransform.inputs[0].element_type if kv_type_hint and kv_type_hint != Any: coder = coders.registry.get_coder(kv_type_hint) self.key_coder = coder.key_coder() else: self.key_coder = coders.registry.get_coder(Any) self.user_state_context = DirectUserStateContext( self._step_context, dofn, self.key_coder) _, all_timer_specs = get_dofn_specs(dofn) for timer_spec in all_timer_specs: self.user_timer_map['user/%s' % timer_spec.name] = timer_spec self.runner = DoFnRunner( dofn, args, kwargs, self._side_inputs, self._applied_ptransform.inputs[0].windowing, tagged_receivers=self._tagged_receivers, step_name=self._applied_ptransform.full_label, state=DoFnState(self._counter_factory), user_state_context=self.user_state_context) self.runner.setup() self.runner.start()