示例#1
0
class PersonCoderImpl(coder_impl.StreamCoderImpl):
  _int_coder_impl = coder_impl.VarIntCoderImpl()
  _str_coder_impl = StrUtf8Coder().get_impl()
  _time_coder_impl = coder_impl.TimestampCoderImpl()

  def encode_to_stream(self, value, stream, nested):
    self._int_coder_impl.encode_to_stream(value.id, stream, True)
    self._str_coder_impl.encode_to_stream(value.name, stream, True)
    self._str_coder_impl.encode_to_stream(value.email_address, stream, True)
    self._str_coder_impl.encode_to_stream(value.credit_card, stream, True)
    self._str_coder_impl.encode_to_stream(value.city, stream, True)
    self._str_coder_impl.encode_to_stream(value.state, stream, True)
    self._time_coder_impl.encode_to_stream(value.date_time, stream, True)
    self._str_coder_impl.encode_to_stream(value.extra, stream, True)

  def decode_from_stream(self, stream, nested):
    id = self._int_coder_impl.decode_from_stream(stream, True)
    name = self._str_coder_impl.decode_from_stream(stream, True)
    email = self._str_coder_impl.decode_from_stream(stream, True)
    credit_card = self._str_coder_impl.decode_from_stream(stream, True)
    city = self._str_coder_impl.decode_from_stream(stream, True)
    state = self._str_coder_impl.decode_from_stream(stream, True)
    date_time = self._time_coder_impl.decode_from_stream(stream, True)
    extra = self._str_coder_impl.decode_from_stream(stream, True)
    return Person(id, name, email, credit_card, city, state, date_time, extra)
示例#2
0
def _nonnull_coder_from_type(field_type):
  type_info = field_type.WhichOneof("type_info")
  if type_info == "atomic_type":
    if field_type.atomic_type in (schema_pb2.INT32, schema_pb2.INT64):
      return VarIntCoder()
    elif field_type.atomic_type == schema_pb2.DOUBLE:
      return FloatCoder()
    elif field_type.atomic_type == schema_pb2.STRING:
      return StrUtf8Coder()
    elif field_type.atomic_type == schema_pb2.BOOLEAN:
      return BooleanCoder()
    elif field_type.atomic_type == schema_pb2.BYTES:
      return BytesCoder()
  elif type_info == "array_type":
    return IterableCoder(_coder_from_type(field_type.array_type.element_type))
  elif type_info == "map_type":
    return MapCoder(
        _coder_from_type(field_type.map_type.key_type),
        _coder_from_type(field_type.map_type.value_type))
  elif type_info == "row_type":
    return RowCoder(field_type.row_type.schema)

  # The Java SDK supports several more types, but the coders are not yet
  # standard, and are not implemented in Python.
  raise ValueError(
      "Encountered a type that is not currently supported by RowCoder: %s" %
      field_type)
示例#3
0
class AuctionCoderImpl(coder_impl.StreamCoderImpl):
    _int_coder_impl = coder_impl.VarIntCoderImpl()
    _str_coder_impl = StrUtf8Coder().get_impl()
    _time_coder_impl = coder_impl.TimestampCoderImpl()

    def encode_to_stream(self, value, stream, nested):
        self._int_coder_impl.encode_to_stream(value.id, stream, True)
        self._str_coder_impl.encode_to_stream(value.item_name, stream, True)
        self._str_coder_impl.encode_to_stream(value.description, stream, True)
        self._int_coder_impl.encode_to_stream(value.initial_bid, stream, True)
        self._int_coder_impl.encode_to_stream(value.reserve, stream, True)
        self._time_coder_impl.encode_to_stream(value.date_time, stream, True)
        self._time_coder_impl.encode_to_stream(value.expires, stream, True)
        self._int_coder_impl.encode_to_stream(value.seller, stream, True)
        self._int_coder_impl.encode_to_stream(value.category, stream, True)
        self._str_coder_impl.encode_to_stream(value.extra, stream, True)

    def decode_from_stream(self, stream, nested):
        id = self._int_coder_impl.decode_from_stream(stream, True)
        item_name = self._str_coder_impl.decode_from_stream(stream, True)
        description = self._str_coder_impl.decode_from_stream(stream, True)
        initial_bid = self._int_coder_impl.decode_from_stream(stream, True)
        reserve = self._int_coder_impl.decode_from_stream(stream, True)
        date_time = self._time_coder_impl.decode_from_stream(stream, True)
        expires = self._time_coder_impl.decode_from_stream(stream, True)
        seller = self._int_coder_impl.decode_from_stream(stream, True)
        category = self._int_coder_impl.decode_from_stream(stream, True)
        extra = self._str_coder_impl.decode_from_stream(stream, True)
        return Auction(id, item_name, description, initial_bid, reserve,
                       date_time, expires, seller, category, extra)
 def __init__(self, key_coder_impl, window_coder_impl):
     self._timestamp_coder_impl = TimestampCoderImpl()
     self._boolean_coder_impl = BooleanCoderImpl()
     self._pane_info_coder_impl = PaneInfoCoderImpl()
     self._key_coder_impl = key_coder_impl
     self._windows_coder_impl = TupleSequenceCoderImpl(window_coder_impl)
     from apache_beam.coders.coders import StrUtf8Coder
     self._tag_coder_impl = StrUtf8Coder().get_impl()
示例#5
0
文件: fileio.py 项目: yifanmai/beam
class _RemoveDuplicates(beam.DoFn):

  FILES_STATE = BagStateSpec('files', StrUtf8Coder())

  def process(self, element, file_state=beam.DoFn.StateParam(FILES_STATE)):
    path = element[0]
    file_metadata = element[1]
    bag_content = [x for x in file_state.read()]

    if not bag_content:
      file_state.add(path)
      _LOGGER.debug('Generated entry for file %s', path)
      yield file_metadata
    else:
      _LOGGER.debug('File %s was already read', path)
def MockReadFromText(
        file_pattern=None,
        coder=StrUtf8Coder(),
        skip_header_lines=0):

    file_content = get_current_test_context().get_file_content(file_pattern)
    if file_content is None:
        raise RuntimeError('no file content set for %s' % file_pattern)
    lines = file_content.replace('\r\n', '\n').split('\n')
    if skip_header_lines:
        lines = lines[skip_header_lines:]
    return 'MockReadFromText' >> beam.Create(
        [
            coder.decode(line)
            for line in lines
        ]
    )
示例#7
0
    def coder_from_type(field_type):
        type_info = field_type.WhichOneof("type_info")
        if type_info == "atomic_type":
            if field_type.atomic_type in (schema_pb2.INT32, schema_pb2.INT64):
                return VarIntCoder()
            elif field_type.atomic_type == schema_pb2.DOUBLE:
                return FloatCoder()
            elif field_type.atomic_type == schema_pb2.STRING:
                return StrUtf8Coder()
        elif type_info == "array_type":
            return IterableCoder(
                RowCoder.coder_from_type(field_type.array_type.element_type))

        # The Java SDK supports several more types, but the coders are not yet
        # standard, and are not implemented in Python.
        raise ValueError(
            "Encountered a type that is not currently supported by RowCoder: %s"
            % field_type)
示例#8
0
class BidCoderImpl(coder_impl.StreamCoderImpl):
  _int_coder_impl = coder_impl.VarIntCoderImpl()
  _str_coder_impl = StrUtf8Coder().get_impl()
  _time_coder_impl = coder_impl.TimestampCoderImpl()

  def encode_to_stream(self, value, stream, nested):
    self._int_coder_impl.encode_to_stream(value.auction, stream, True)
    self._int_coder_impl.encode_to_stream(value.bidder, stream, True)
    self._int_coder_impl.encode_to_stream(value.price, stream, True)
    self._time_coder_impl.encode_to_stream(value.date_time, stream, True)
    self._str_coder_impl.encode_to_stream(value.extra, stream, True)

  def decode_from_stream(self, stream, nested):
    auction = self._int_coder_impl.decode_from_stream(stream, True)
    bidder = self._int_coder_impl.decode_from_stream(stream, True)
    price = self._int_coder_impl.decode_from_stream(stream, True)
    date_time = self._time_coder_impl.decode_from_stream(stream, True)
    extra = self._str_coder_impl.decode_from_stream(stream, True)
    return Bid(auction, bidder, price, date_time, extra)
示例#9
0
def _nonnull_coder_from_type(field_type):
    type_info = field_type.WhichOneof("type_info")
    if type_info == "atomic_type":
        if field_type.atomic_type in (schema_pb2.INT32, schema_pb2.INT64):
            return VarIntCoder()
        elif field_type.atomic_type == schema_pb2.DOUBLE:
            return FloatCoder()
        elif field_type.atomic_type == schema_pb2.STRING:
            return StrUtf8Coder()
        elif field_type.atomic_type == schema_pb2.BOOLEAN:
            return BooleanCoder()
        elif field_type.atomic_type == schema_pb2.BYTES:
            return BytesCoder()
    elif type_info == "array_type":
        return IterableCoder(
            _coder_from_type(field_type.array_type.element_type))
    elif type_info == "map_type":
        return MapCoder(_coder_from_type(field_type.map_type.key_type),
                        _coder_from_type(field_type.map_type.value_type))
    elif type_info == "logical_type":
        # Special case for the Any logical type. Just use the default coder for an
        # unknown Python object.
        if field_type.logical_type.urn == PYTHON_ANY_URN:
            return typecoders.registry.get_coder(object)

        logical_type = LogicalType.from_runner_api(field_type.logical_type)
        return LogicalTypeCoder(
            logical_type,
            _coder_from_type(field_type.logical_type.representation))
    elif type_info == "row_type":
        return RowCoder(field_type.row_type.schema)

    # The Java SDK supports several more types, but the coders are not yet
    # standard, and are not implemented in Python.
    raise ValueError(
        "Encountered a type that is not currently supported by RowCoder: %s" %
        field_type)