示例#1
0
  def start_bundle(self):
    transform = self._applied_ptransform.transform

    self._tagged_receivers = _TaggedReceivers(self._evaluation_context)
    for output_tag in self._applied_ptransform.outputs:
      output_pcollection = pvalue.PCollection(None, tag=output_tag)
      output_pcollection.producer = self._applied_ptransform
      self._tagged_receivers[output_tag] = (
          self._evaluation_context.create_bundle(output_pcollection))
      self._tagged_receivers[output_tag].tag = output_tag

    self._counter_factory = counters.CounterFactory()

    # TODO(aaltay): Consider storing the serialized form as an optimization.
    dofn = pickler.loads(pickler.dumps(transform.dofn))

    pipeline_options = self._evaluation_context.pipeline_options
    if (pipeline_options is not None
        and pipeline_options.view_as(TypeOptions).runtime_type_check):
      dofn = TypeCheckWrapperDoFn(dofn, transform.get_type_hints())

    dofn = OutputCheckWrapperDoFn(dofn, self._applied_ptransform.full_label)
    self.runner = DoFnRunner(
        dofn, transform.args, transform.kwargs,
        self._side_inputs,
        self._applied_ptransform.inputs[0].windowing,
        tagged_receivers=self._tagged_receivers,
        step_name=self._applied_ptransform.full_label,
        state=DoFnState(self._counter_factory),
        scoped_metrics_container=self.scoped_metrics_container)
    self.runner.start()
示例#2
0
    def start_bundle(self):
        transform = self._applied_ptransform.transform

        self._tagged_receivers = _TaggedReceivers(self._evaluation_context)
        if isinstance(self._applied_ptransform.parent.transform,
                      core._MultiParDo):  # pylint: disable=protected-access
            do_outputs_tuple = self._applied_ptransform.parent.outputs[0]
            assert isinstance(do_outputs_tuple, pvalue.DoOutputsTuple)
            main_output_pcollection = do_outputs_tuple[
                do_outputs_tuple._main_tag]  # pylint: disable=protected-access

            for side_output_tag in transform.side_output_tags:
                output_pcollection = do_outputs_tuple[side_output_tag]
                self._tagged_receivers[side_output_tag] = (
                    self._evaluation_context.create_bundle(output_pcollection))
                self._tagged_receivers[side_output_tag].tag = side_output_tag
        else:
            assert len(self._outputs) == 1
            main_output_pcollection = list(self._outputs)[0]

        self._tagged_receivers[None] = self._evaluation_context.create_bundle(
            main_output_pcollection)
        self._tagged_receivers[None].tag = None  # main_tag is None.

        self._counter_factory = counters.CounterFactory()

        # TODO(aaltay): Consider storing the serialized form as an optimization.
        dofn = pickler.loads(pickler.dumps(transform.dofn))

        pipeline_options = self._evaluation_context.pipeline_options
        if (pipeline_options is not None
                and pipeline_options.view_as(TypeOptions).runtime_type_check):
            # TODO(sourabhbajaj): Remove this if-else
            if isinstance(dofn, core.NewDoFn):
                dofn = TypeCheckWrapperNewDoFn(dofn,
                                               transform.get_type_hints())
            else:
                dofn = TypeCheckWrapperDoFn(dofn, transform.get_type_hints())

        # TODO(sourabhbajaj): Remove this if-else
        if isinstance(dofn, core.NewDoFn):
            dofn = OutputCheckWrapperNewDoFn(
                dofn, self._applied_ptransform.full_label)
        else:
            dofn = OutputCheckWrapperDoFn(dofn,
                                          self._applied_ptransform.full_label)
        self.runner = DoFnRunner(
            dofn,
            transform.args,
            transform.kwargs,
            self._side_inputs,
            self._applied_ptransform.inputs[0].windowing,
            tagged_receivers=self._tagged_receivers,
            step_name=self._applied_ptransform.full_label,
            state=DoFnState(self._counter_factory),
            scoped_metrics_container=self.scoped_metrics_container)
        self.runner.start()