Пример #1
0
  def expand(self, pvalueish):
    # type: (pvalue.PCollection) -> pvalue.PCollection
    if isinstance(pvalueish, pvalue.PBegin):
      self._inputs = {}
    elif isinstance(pvalueish, (list, tuple)):
      self._inputs = {str(ix): pvalue for ix, pvalue in enumerate(pvalueish)}
    elif isinstance(pvalueish, dict):
      self._inputs = pvalueish
    else:
      self._inputs = {'input': pvalueish}
    pipeline = (
        next(iter(self._inputs.values())).pipeline
        if self._inputs
        else pvalueish.pipeline)
    context = pipeline_context.PipelineContext()
    transform_proto = beam_runner_api_pb2.PTransform(
        unique_name=pipeline._current_transform().full_label,
        spec=beam_runner_api_pb2.FunctionSpec(
            urn=self._urn, payload=self._payload))
    for tag, pcoll in self._inputs.items():
      transform_proto.inputs[tag] = context.pcollections.get_id(pcoll)
      # Conversion to/from proto assumes producers.
      # TODO: Possibly loosen this.
      context.transforms.put_proto(
          '%s_%s' % (self._IMPULSE_PREFIX, tag),
          beam_runner_api_pb2.PTransform(
              unique_name='%s_%s' % (self._IMPULSE_PREFIX, tag),
              spec=beam_runner_api_pb2.FunctionSpec(
                  urn=common_urns.primitives.IMPULSE.urn),
              outputs={'out': transform_proto.inputs[tag]}))
    components = context.to_runner_api()
    request = beam_expansion_api_pb2.ExpansionRequest(
        components=components,
        namespace=self._namespace,  # type: ignore  # mypy thinks self._namespace is threading.local
        transform=transform_proto)

    if isinstance(self._expansion_service, str):
      with grpc.insecure_channel(self._expansion_service) as channel:
        response = beam_expansion_api_pb2_grpc.ExpansionServiceStub(
            channel).Expand(request)
    else:
      response = self._expansion_service.Expand(request, None)

    if response.error:
      raise RuntimeError(response.error)
    self._expanded_components = response.components
    self._expanded_transform = response.transform
    result_context = pipeline_context.PipelineContext(response.components)

    def fix_output(pcoll, tag):
      pcoll.pipeline = pipeline
      pcoll.tag = tag
      return pcoll
    self._outputs = {
        tag: fix_output(result_context.pcollections.get_by_id(pcoll_id), tag)
        for tag, pcoll_id in self._expanded_transform.outputs.items()
    }

    return self._output_to_pvalueish(self._outputs)
Пример #2
0
    def expand(self, pvalueish):
        # type: (pvalue.PCollection) -> pvalue.PCollection
        if isinstance(pvalueish, pvalue.PBegin):
            self._inputs = {}
        elif isinstance(pvalueish, (list, tuple)):
            self._inputs = {
                str(ix): pvalue
                for ix, pvalue in enumerate(pvalueish)
            }
        elif isinstance(pvalueish, dict):
            self._inputs = pvalueish
        else:
            self._inputs = {'input': pvalueish}
        pipeline = (next(iter(self._inputs.values())).pipeline
                    if self._inputs else pvalueish.pipeline)
        context = pipeline_context.PipelineContext()
        transform_proto = beam_runner_api_pb2.PTransform(
            unique_name=pipeline._current_transform().full_label,
            spec=beam_runner_api_pb2.FunctionSpec(urn=self._urn,
                                                  payload=self._payload))
        for tag, pcoll in self._inputs.items():
            transform_proto.inputs[tag] = context.pcollections.get_id(pcoll)
            # Conversion to/from proto assumes producers.
            # TODO: Possibly loosen this.
            context.transforms.put_proto(
                '%s_%s' % (self._IMPULSE_PREFIX, tag),
                beam_runner_api_pb2.PTransform(
                    unique_name='%s_%s' % (self._IMPULSE_PREFIX, tag),
                    spec=beam_runner_api_pb2.FunctionSpec(
                        urn=common_urns.primitives.IMPULSE.urn),
                    outputs={'out': transform_proto.inputs[tag]}))
        components = context.to_runner_api()

        # Retain unknown options since they may only be relevant to the expanding
        # SDK
        options = pipeline._options.get_all_options(
            drop_default=True, retain_unknown_options=True)
        request = beam_expansion_api_pb2.ExpansionRequest(
            components=components,
            namespace=self.
            _namespace,  # type: ignore  # mypy thinks self._namespace is threading.local
            transform=transform_proto,
            pipeline_options=job_utils.pipeline_options_dict_to_struct(
                options))

        if isinstance(self._expansion_service, str):
            # Some environments may not support unsecure channels. Hence using a
            # secure channel with local credentials here.
            # TODO: update this to support secure non-local channels.
            channel_creds = grpc.local_channel_credentials()
            with grpc.secure_channel(self._expansion_service,
                                     channel_creds) as channel:
                response = beam_expansion_api_pb2_grpc.ExpansionServiceStub(
                    channel).Expand(request)
        else:
            response = self._expansion_service.Expand(request, None)

        if response.error:
            raise RuntimeError(response.error)
        self._expanded_components = response.components
        self._expanded_transform = response.transform
        self._expanded_requirements = response.requirements
        result_context = pipeline_context.PipelineContext(response.components)

        def fix_output(pcoll, tag):
            pcoll.pipeline = pipeline
            pcoll.tag = tag
            return pcoll

        self._outputs = {
            tag: fix_output(result_context.pcollections.get_by_id(pcoll_id),
                            tag)
            for tag, pcoll_id in self._expanded_transform.outputs.items()
        }

        return self._output_to_pvalueish(self._outputs)
Пример #3
0
    def expand(self, pvalueish):
        # type: (pvalue.PCollection) -> pvalue.PCollection
        if isinstance(pvalueish, pvalue.PBegin):
            self._inputs = {}
        elif isinstance(pvalueish, (list, tuple)):
            self._inputs = {
                str(ix): pvalue
                for ix, pvalue in enumerate(pvalueish)
            }
        elif isinstance(pvalueish, dict):
            self._inputs = pvalueish
        else:
            self._inputs = {'input': pvalueish}
        pipeline = (next(iter(self._inputs.values())).pipeline
                    if self._inputs else pvalueish.pipeline)
        context = pipeline_context.PipelineContext(
            component_id_map=pipeline.component_id_map)
        transform_proto = beam_runner_api_pb2.PTransform(
            unique_name=pipeline._current_transform().full_label,
            spec=beam_runner_api_pb2.FunctionSpec(urn=self._urn,
                                                  payload=self._payload))
        for tag, pcoll in self._inputs.items():
            transform_proto.inputs[tag] = context.pcollections.get_id(pcoll)
            # Conversion to/from proto assumes producers.
            # TODO: Possibly loosen this.
            context.transforms.put_proto(
                '%s_%s' % (self._IMPULSE_PREFIX, tag),
                beam_runner_api_pb2.PTransform(
                    unique_name='%s_%s' % (self._IMPULSE_PREFIX, tag),
                    spec=beam_runner_api_pb2.FunctionSpec(
                        urn=common_urns.primitives.IMPULSE.urn),
                    outputs={'out': transform_proto.inputs[tag]}))
        output_coders = None
        if self._type_hints.output_types:
            if self._type_hints.output_types[0]:
                output_coders = dict(
                    (str(k), context.coder_id_from_element_type(v))
                    for (k, v) in enumerate(self._type_hints.output_types[0]))
            elif self._type_hints.output_types[1]:
                output_coders = {
                    k: context.coder_id_from_element_type(v)
                    for (k, v) in self._type_hints.output_types[1].items()
                }
        components = context.to_runner_api()
        request = beam_expansion_api_pb2.ExpansionRequest(
            components=components,
            namespace=self.
            _external_namespace,  # type: ignore  # mypy thinks self._namespace is threading.local
            transform=transform_proto,
            output_coder_requests=output_coders)

        with self._service() as service:
            response = service.Expand(request)
            if response.error:
                raise RuntimeError(response.error)
            self._expanded_components = response.components
            if any(env.dependencies
                   for env in self._expanded_components.environments.values()):
                self._expanded_components = self._resolve_artifacts(
                    self._expanded_components, service.artifact_service(),
                    pipeline.local_tempdir)

        self._expanded_transform = response.transform
        self._expanded_requirements = response.requirements
        result_context = pipeline_context.PipelineContext(response.components)

        def fix_output(pcoll, tag):
            pcoll.pipeline = pipeline
            pcoll.tag = tag
            return pcoll

        self._outputs = {
            tag: fix_output(result_context.pcollections.get_by_id(pcoll_id),
                            tag)
            for tag, pcoll_id in self._expanded_transform.outputs.items()
        }

        return self._output_to_pvalueish(self._outputs)