示例#1
0
    def _get_schema_proto_and_payload(self, *args, **kwargs):
        named_fields = []
        fields_to_values = OrderedDict()
        next_field_id = 0
        for value in args:
            if value is None:
                raise ValueError(
                    'Received value None. None values are currently not supported'
                )
            named_fields.append(
                ((JavaClassLookupPayloadBuilder.IGNORED_ARG_FORMAT %
                  next_field_id),
                 convert_to_typing_type(instance_to_type(value))))
            fields_to_values[(
                JavaClassLookupPayloadBuilder.IGNORED_ARG_FORMAT %
                next_field_id)] = value
            next_field_id += 1
        for key, value in kwargs.items():
            if not key:
                raise ValueError('Parameter name cannot be empty')
            if value is None:
                raise ValueError(
                    'Received value None for key %s. None values are currently not '
                    'supported' % key)
            named_fields.append(
                (key, convert_to_typing_type(instance_to_type(value))))
            fields_to_values[key] = value

        schema_proto = named_fields_to_schema(named_fields)
        row = named_tuple_from_schema(schema_proto)(**fields_to_values)
        schema = named_tuple_to_schema(type(row))

        payload = RowCoder(schema).encode(row)
        return (schema_proto, payload)
示例#2
0
    def test_user_type_annotated_with_id_after_conversion(self):
        MyCuteClass = NamedTuple('MyCuteClass', [
            ('name', str),
        ])
        self.assertFalse(hasattr(MyCuteClass, '_beam_schema_id'))

        schema = named_tuple_to_schema(MyCuteClass)
        self.assertTrue(hasattr(MyCuteClass, '_beam_schema_id'))
        self.assertEqual(MyCuteClass._beam_schema_id, schema.id)
示例#3
0
def set_encoding_position(type_, values):
  beam_schema_id = "_beam_schema_id"
  if hasattr(type_, beam_schema_id):
    schema = SCHEMA_REGISTRY.get_schema_by_id(getattr(type_, beam_schema_id))
  else:
    schema = named_tuple_to_schema(type_)
  val = dict(values)
  for idx, field in enumerate(schema.fields):
    schema.fields[idx].encoding_position = val[field.name]
  SCHEMA_REGISTRY.add(type_, schema)
示例#4
0
 def from_type_hint(type_hint, registry):
     if isinstance(type_hint, row_type.RowTypeConstraint):
         try:
             schema = named_fields_to_schema(type_hint._fields)
         except ValueError:
             # TODO(BEAM-10570): Consider a pythonsdk logical type.
             return typecoders.registry.get_coder(object)
     else:
         schema = named_tuple_to_schema(type_hint)
     return RowCoder(schema)
示例#5
0
    def __init__(
        self,
        project_id,
        instance_id,
        database_id,
        row_type=None,
        sql=None,
        table=None,
        host=None,
        emulator_host=None,
        batching=None,
        timestamp_bound_mode=None,
        read_timestamp=None,
        staleness=None,
        time_unit=None,
        expansion_service=None,
    ):
        """
    Initializes a read operation from Spanner.

    :param project_id: Specifies the Cloud Spanner project.
    :param instance_id: Specifies the Cloud Spanner instance.
    :param database_id: Specifies the Cloud Spanner database.
    :param row_type: Row type that fits the given query or table. Passed as
        NamedTuple, e.g. NamedTuple('name', [('row_name', unicode)])
    :param sql: An sql query to execute. It's results must fit the
        provided row_type. Don't use when table is set.
    :param table: A spanner table. When provided all columns from row_type
        will be selected to query. Don't use when query is set.
    :param batching: By default Batch API is used to read data from Cloud
        Spanner. It is useful to disable batching when the underlying query
        is not root-partitionable.
    :param host: Specifies the Cloud Spanner host.
    :param emulator_host: Specifies Spanner emulator host.
    :param timestamp_bound_mode: Defines how Cloud Spanner will choose a
        timestamp for a read-only transaction or a single read/query.
        Passed as TimestampBoundMode enum. Possible values:
        STRONG: A timestamp bound that will perform reads and queries at a
        timestamp where all previously committed transactions are visible.
        READ_TIMESTAMP: Returns a timestamp bound that will perform reads
        and queries at the given timestamp.
        MIN_READ_TIMESTAMP: Returns a timestamp bound that will perform reads
        and queries at a timestamp chosen to be at least given timestamp value.
        EXACT_STALENESS: Returns a timestamp bound that will perform reads and
        queries at an exact staleness. The timestamp is chosen soon after the
        read is started.
        MAX_STALENESS: Returns a timestamp bound that will perform reads and
        queries at a timestamp chosen to be at most time_unit stale.
    :param read_timestamp: Timestamp in string. Use only when
        timestamp_bound_mode is set to READ_TIMESTAMP or MIN_READ_TIMESTAMP.
    :param staleness: Staleness value as int. Use only when
        timestamp_bound_mode is set to EXACT_STALENESS or MAX_STALENESS.
        time_unit has to be set along with this param.
    :param time_unit: Time unit for staleness_value passed as TimeUnit enum.
        Possible values: NANOSECONDS, MICROSECONDS, MILLISECONDS, SECONDS,
        HOURS, DAYS.
    :param expansion_service: The address (host:port) of the ExpansionService.
    """
        assert row_type
        assert sql or table and not (sql and table)
        staleness_value = int(staleness) if staleness else None

        if staleness_value or time_unit:
            assert staleness_value and time_unit and \
                   timestamp_bound_mode is TimestampBoundMode.MAX_STALENESS or \
                   timestamp_bound_mode is TimestampBoundMode.EXACT_STALENESS

        if read_timestamp:
            assert timestamp_bound_mode is TimestampBoundMode.MIN_READ_TIMESTAMP\
                   or timestamp_bound_mode is TimestampBoundMode.READ_TIMESTAMP

        super(ReadFromSpanner, self).__init__(
            self.URN,
            NamedTupleBasedPayloadBuilder(
                ReadFromSpannerSchema(
                    instance_id=instance_id,
                    database_id=database_id,
                    sql=sql,
                    table=table,
                    schema=named_tuple_to_schema(row_type).SerializeToString(),
                    project_id=project_id,
                    host=host,
                    emulator_host=emulator_host,
                    batching=batching,
                    timestamp_bound_mode=_get_enum_name(timestamp_bound_mode),
                    read_timestamp=read_timestamp,
                    staleness=staleness,
                    time_unit=_get_enum_name(time_unit),
                ), ),
            expansion_service or default_io_expansion_service(),
        )
 def __reduce__(self):
     # when pickling, use bytes representation of the schema.
     return (self._from_serialized_schema, (named_tuple_to_schema(
         self._namedtuple_ctor).SerializeToString(), ))
示例#7
0
 def from_type_hint(type_hint, registry):
   if isinstance(type_hint, row_type.RowTypeConstraint):
     schema = named_fields_to_schema(type_hint._fields)
   else:
     schema = named_tuple_to_schema(type_hint)
   return RowCoder(schema)
示例#8
0
 def build(self):
     row = self._get_named_tuple_instance()
     schema = named_tuple_to_schema(type(row))
     return ExternalConfigurationPayload(
         schema=schema, payload=RowCoder(schema).encode(row))
示例#9
0
 def from_type_hint(named_tuple_type, registry):
   return RowCoder(named_tuple_to_schema(named_tuple_type))