Пример #1
0
 def from_runner_api(proto, context):
     result = AppliedPTransform(parent=None,
                                transform=pickler.loads(
                                    proto_utils.unpack_Any(
                                        proto.spec.parameter,
                                        wrappers_pb2.BytesValue).value),
                                full_label=proto.unique_name,
                                inputs=[
                                    context.pcollections.get_by_id(id)
                                    for id in proto.inputs.values()
                                ])
     result.parts = [
         context.transforms.get_by_id(id) for id in proto.subtransforms
     ]
     result.outputs = {
         None if tag == 'None' else tag: context.pcollections.get_by_id(id)
         for tag, id in proto.outputs.items()
     }
     if not result.parts:
         for tag, pc in result.outputs.items():
             if pc not in result.inputs:
                 pc.producer = result
                 pc.tag = tag
     result.update_input_refcounts()
     return result
Пример #2
0
 def from_runner_api(cls, proto, context):
   if proto is None or not proto.urn:
     return None
   parameter_type, constructor = cls._known_urns[proto.urn]
   return constructor(
       proto_utils.unpack_Any(proto.parameter, parameter_type),
       context)
Пример #3
0
 def create_operation(self, transform_id, consumers):
     transform_proto = self.descriptor.transforms[transform_id]
     creator, parameter_type = self._known_urns[transform_proto.spec.urn]
     parameter = proto_utils.unpack_Any(transform_proto.spec.parameter,
                                        parameter_type)
     return creator(self, transform_id, transform_proto, parameter,
                    consumers)
Пример #4
0
 def extract_endpoints(stage):
     # Returns maps of transform names to PCollection identifiers.
     # Also mutates IO stages to point to the data data_operation_spec.
     data_input = {}
     data_side_input = {}
     data_output = {}
     for transform in stage.transforms:
         pcoll_id = proto_utils.unpack_Any(
             transform.spec.parameter, wrappers_pb2.BytesValue).value
         if transform.spec.urn in (bundle_processor.DATA_INPUT_URN,
                                   bundle_processor.DATA_OUTPUT_URN):
             if transform.spec.urn == bundle_processor.DATA_INPUT_URN:
                 target = transform.unique_name, only_element(
                     transform.outputs)
                 data_input[target] = pcoll_id
             elif transform.spec.urn == bundle_processor.DATA_OUTPUT_URN:
                 target = transform.unique_name, only_element(
                     transform.inputs)
                 data_output[target] = pcoll_id
             else:
                 raise NotImplementedError
             if data_operation_spec:
                 transform.spec.parameter.CopyFrom(data_operation_spec)
             else:
                 transform.spec.parameter.Clear()
     return data_input, data_side_input, data_output
Пример #5
0
 def from_runner_api(cls, proto, context):
   if proto is None or not proto.urn:
     return None
   parameter_type, constructor = cls._known_urns[proto.urn]
   return constructor(
       proto_utils.unpack_Any(proto.parameter, parameter_type),
       context)
Пример #6
0
 def side_inputs(self):
     for transform in self.transforms:
         if transform.spec.urn == urns.PARDO_TRANSFORM:
             payload = proto_utils.unpack_Any(
                 transform.spec.parameter,
                 beam_runner_api_pb2.ParDoPayload)
             for side_input in payload.side_inputs:
                 yield transform.inputs[side_input]
Пример #7
0
  def from_runner_api(cls, fn_proto, context):
    """Converts from an SdkFunctionSpec to a Fn object.

    Prefer registering a urn with its parameter type and constructor.
    """
    parameter_type, constructor = cls._known_urns[fn_proto.spec.urn]
    return constructor(
        proto_utils.unpack_Any(fn_proto.spec.parameter, parameter_type),
        context)
Пример #8
0
    def from_runner_api(cls, fn_proto, context):
        """Converts from an SdkFunctionSpec to a Fn object.

    Prefer registering a urn with its parameter type and constructor.
    """
        parameter_type, constructor = cls._known_urns[fn_proto.spec.urn]
        return constructor(
            proto_utils.unpack_Any(fn_proto.spec.parameter, parameter_type),
            context)
Пример #9
0
 def get_coder(self, coder_id):
   coder_proto = self.descriptor.codersyyy[coder_id]
   if coder_proto.spec.spec.urn:
     return self.context.coders.get_by_id(coder_id)
   else:
     # No URN, assume cloud object encoding json bytes.
     return operation_specs.get_coder_from_spec(
         json.loads(
             proto_utils.unpack_Any(coder_proto.spec.spec.parameter,
                                    wrappers_pb2.BytesValue).value))
Пример #10
0
 def get_coder(self, coder_id):
     coder_proto = self.descriptor.codersyyy[coder_id]
     if coder_proto.spec.spec.urn:
         return self.context.coders.get_by_id(coder_id)
     else:
         # No URN, assume cloud object encoding json bytes.
         return operation_specs.get_coder_from_spec(
             json.loads(
                 proto_utils.unpack_Any(coder_proto.spec.spec.parameter,
                                        wrappers_pb2.BytesValue).value))
Пример #11
0
 def has_as_main_input(self, pcoll):
     for transform in self.transforms:
         if transform.spec.urn == urns.PARDO_TRANSFORM:
             payload = proto_utils.unpack_Any(
                 transform.spec.parameter,
                 beam_runner_api_pb2.ParDoPayload)
             local_side_inputs = payload.side_inputs
         else:
             local_side_inputs = {}
         for local_id, pipeline_id in transform.inputs.items():
             if pcoll == pipeline_id and local_id not in local_side_inputs:
                 return True
Пример #12
0
def create(factory, transform_id, transform_proto, parameter, consumers):
  assert parameter.do_fn.spec.urn == urns.PICKLED_DO_FN_INFO
  serialized_fn = proto_utils.unpack_Any(
      parameter.do_fn.spec.parameter, wrappers_pb2.BytesValue).value
  dofn_data = pickler.loads(serialized_fn)
  if len(dofn_data) == 2:
    # Has side input data.
    serialized_fn, side_input_data = dofn_data
  else:
    # No side input data.
    side_input_data = []
  return _create_pardo_operation(
      factory, transform_id, transform_proto, consumers,
      serialized_fn, side_input_data)
Пример #13
0
 def from_runner_api(proto, context):
   result = AppliedPTransform(
       parent=None,
       transform=pickler.loads(
           proto_utils.unpack_Any(proto.spec.parameter,
                                  wrappers_pb2.BytesValue).value),
       full_label=proto.unique_name,
       inputs=[
           context.pcollections.get_by_id(id) for id in proto.inputs.values()])
   result.parts = [
       context.transforms.get_by_id(id) for id in proto.subtransforms]
   result.outputs = {
       None if tag == 'None' else tag: context.pcollections.get_by_id(id)
       for tag, id in proto.outputs.items()}
   if not result.parts:
     for tag, pc in result.outputs.items():
       if pc not in result.inputs:
         pc.producer = result
         pc.tag = tag
   result.update_input_refcounts()
   return result
Пример #14
0
 def from_runner_api(cls, fn_proto, context):
   parameter_type, constructor = cls._known_urns[fn_proto.spec.urn]
   return constructor(
       proto_utils.unpack_Any(fn_proto.spec.parameter, parameter_type),
       context)
Пример #15
0
 def create_operation(self, transform_id, consumers):
   transform_proto = self.descriptor.transforms[transform_id]
   creator, parameter_type = self._known_urns[transform_proto.spec.urn]
   parameter = proto_utils.unpack_Any(
       transform_proto.spec.parameter, parameter_type)
   return creator(self, transform_id, transform_proto, parameter, consumers)
Пример #16
0
 def from_runner_api(cls, fn_proto, context):
     parameter_type, constructor = cls._known_urns[fn_proto.spec.urn]
     return constructor(
         proto_utils.unpack_Any(fn_proto.spec.parameter, parameter_type),
         context)