Пример #1
0
  def test_encoding_position_reorder_fields(self):
    schema1 = schema_pb2.Schema(
        id="reorder_test_schema1",
        fields=[
            schema_pb2.Field(
                name="f_int32",
                type=schema_pb2.FieldType(atomic_type=schema_pb2.INT32),
            ),
            schema_pb2.Field(
                name="f_str",
                type=schema_pb2.FieldType(atomic_type=schema_pb2.STRING),
            ),
        ])
    schema2 = schema_pb2.Schema(
        id="reorder_test_schema2",
        encoding_positions_set=True,
        fields=[
            schema_pb2.Field(
                name="f_str",
                type=schema_pb2.FieldType(atomic_type=schema_pb2.STRING),
                encoding_position=1,
            ),
            schema_pb2.Field(
                name="f_int32",
                type=schema_pb2.FieldType(atomic_type=schema_pb2.INT32),
                encoding_position=0,
            ),
        ])

    RowSchema1 = named_tuple_from_schema(schema1)
    RowSchema2 = named_tuple_from_schema(schema2)
    roundtripped = RowCoder(schema2).decode(
        RowCoder(schema1).encode(RowSchema1(42, "Hello World!")))

    self.assertEqual(RowSchema2(f_int32=42, f_str="Hello World!"), roundtripped)
Пример #2
0
  def test_encoding_position_reorder_fields(self):
    fields = [("field1", str), ("field2", int), ("field3", int)]

    expected = typing.NamedTuple('expected', fields)
    reorder = schema_pb2.Schema(
        id="new_order",
        fields=[
            schema_pb2.Field(
                name="field3",
                type=schema_pb2.FieldType(atomic_type=schema_pb2.STRING),
                encoding_position=2),
            schema_pb2.Field(
                name="field2",
                type=schema_pb2.FieldType(atomic_type=schema_pb2.INT32),
                encoding_position=1),
            schema_pb2.Field(
                name="field1",
                type=schema_pb2.FieldType(atomic_type=schema_pb2.INT32),
                encoding_position=0)
        ])

    old_coder = RowCoder.from_type_hint(expected, None)
    new_coder = RowCoder(reorder)

    encode_expected = old_coder.encode(expected("foo", 7, 12))
    encode_reorder = new_coder.encode(expected(12, 7, "foo"))
    self.assertEqual(encode_expected, encode_reorder)
Пример #3
0
    def test_row_coder_nested_struct(self):
        Pair = typing.NamedTuple('Pair', [('left', Person), ('right', Person)])

        value = Pair(self.PEOPLE[0], self.PEOPLE[1])
        coder = RowCoder(typing_to_runner_api(Pair).row_type.schema)

        self.assertEqual(value, coder.decode(coder.encode(value)))
Пример #4
0
  def test_create_row_coder_from_schema(self):
    schema = schema_pb2.Schema(
        id="person",
        fields=[
            schema_pb2.Field(
                name="name",
                type=schema_pb2.FieldType(atomic_type=schema_pb2.STRING)),
            schema_pb2.Field(
                name="age",
                type=schema_pb2.FieldType(atomic_type=schema_pb2.INT32)),
            schema_pb2.Field(
                name="address",
                type=schema_pb2.FieldType(
                    atomic_type=schema_pb2.STRING, nullable=True)),
            schema_pb2.Field(
                name="aliases",
                type=schema_pb2.FieldType(
                    array_type=schema_pb2.ArrayType(
                        element_type=schema_pb2.FieldType(
                            atomic_type=schema_pb2.STRING)))),
            schema_pb2.Field(
                name="knows_javascript",
                type=schema_pb2.FieldType(atomic_type=schema_pb2.BOOLEAN)),
            schema_pb2.Field(
                name="payload",
                type=schema_pb2.FieldType(
                    atomic_type=schema_pb2.BYTES, nullable=True)),
            schema_pb2.Field(
                name="custom_metadata",
                type=schema_pb2.FieldType(
                    map_type=schema_pb2.MapType(
                        key_type=schema_pb2.FieldType(
                            atomic_type=schema_pb2.STRING),
                        value_type=schema_pb2.FieldType(
                            atomic_type=schema_pb2.INT64),
                    ))),
            schema_pb2.Field(
                name="favorite_time",
                type=schema_pb2.FieldType(
                    logical_type=schema_pb2.LogicalType(
                        urn="beam:logical_type:micros_instant:v1",
                        representation=schema_pb2.FieldType(
                            row_type=schema_pb2.RowType(
                                schema=schema_pb2.Schema(
                                    id="micros_instant",
                                    fields=[
                                        schema_pb2.Field(
                                            name="seconds",
                                            type=schema_pb2.FieldType(
                                                atomic_type=schema_pb2.INT64)),
                                        schema_pb2.Field(
                                            name="micros",
                                            type=schema_pb2.FieldType(
                                                atomic_type=schema_pb2.INT64)),
                                    ])))))),
        ])
    coder = RowCoder(schema)

    for test_case in self.PEOPLE:
      self.assertEqual(test_case, coder.decode(coder.encode(test_case)))
Пример #5
0
    def _verify_row(self, schema, row_payload, expected_values):
        row = RowCoder(schema).decode(row_payload)

        for attr_name, expected_value in expected_values.items():
            self.assertTrue(hasattr(row, attr_name))
            value = getattr(row, attr_name)
            self.assertEqual(expected_value, value)
Пример #6
0
    def _get_schema_proto_and_payload(self, *args, **kwargs):
        named_fields = []
        fields_to_values = OrderedDict()
        next_field_id = 0
        for value in args:
            if value is None:
                raise ValueError(
                    'Received value None. None values are currently not supported'
                )
            named_fields.append(
                ((JavaClassLookupPayloadBuilder.IGNORED_ARG_FORMAT %
                  next_field_id),
                 convert_to_typing_type(instance_to_type(value))))
            fields_to_values[(
                JavaClassLookupPayloadBuilder.IGNORED_ARG_FORMAT %
                next_field_id)] = value
            next_field_id += 1
        for key, value in kwargs.items():
            if not key:
                raise ValueError('Parameter name cannot be empty')
            if value is None:
                raise ValueError(
                    'Received value None for key %s. None values are currently not '
                    'supported' % key)
            named_fields.append(
                (key, convert_to_typing_type(instance_to_type(value))))
            fields_to_values[key] = value

        schema_proto = named_fields_to_schema(named_fields)
        row = named_tuple_from_schema(schema_proto)(**fields_to_values)
        schema = named_tuple_to_schema(type(row))

        payload = RowCoder(schema).encode(row)
        return (schema_proto, payload)
Пример #7
0
    def test_create_row_coder_from_schema(self):
        schema = schema_pb2.Schema(
            id="person",
            fields=[
                schema_pb2.Field(
                    name="name",
                    type=schema_pb2.FieldType(atomic_type=schema_pb2.STRING)),
                schema_pb2.Field(
                    name="age",
                    type=schema_pb2.FieldType(atomic_type=schema_pb2.INT32)),
                schema_pb2.Field(name="address",
                                 type=schema_pb2.FieldType(
                                     atomic_type=schema_pb2.STRING,
                                     nullable=True)),
                schema_pb2.Field(
                    name="aliases",
                    type=schema_pb2.FieldType(array_type=schema_pb2.ArrayType(
                        element_type=schema_pb2.FieldType(
                            atomic_type=schema_pb2.STRING)))),
                schema_pb2.Field(
                    name="knows_javascript",
                    type=schema_pb2.FieldType(atomic_type=schema_pb2.BOOLEAN)),
                schema_pb2.Field(name="payload",
                                 type=schema_pb2.FieldType(
                                     atomic_type=schema_pb2.BYTES,
                                     nullable=True)),
            ])
        coder = RowCoder(schema)

        for test_case in self.PEOPLE:
            self.assertEqual(test_case, coder.decode(coder.encode(test_case)))
Пример #8
0
    def __init__(
        self,
        table_name,
        driver_class_name,
        jdbc_url,
        username,
        password,
        statement=None,
        connection_properties=None,
        connection_init_sqls=None,
        expansion_service=None,
        classpath=None,
    ):
        """
    Initializes a write operation to Jdbc.

    :param driver_class_name: name of the jdbc driver class
    :param jdbc_url: full jdbc url to the database.
    :param username: database username
    :param password: database password
    :param statement: sql statement to be executed
    :param connection_properties: properties of the jdbc connection
                                  passed as string with format
                                  [propertyName=property;]*
    :param connection_init_sqls: required only for MySql and MariaDB.
                                 passed as list of strings
    :param expansion_service: The address (host:port) of the ExpansionService.
    :param classpath: A list of JARs or Java packages to include in the
                      classpath for the expansion service. This option is
                      usually needed for `jdbc` to include extra JDBC driver
                      packages.
                      The packages can be in these three formats: (1) A local
                      file, (2) A URL, (3) A gradle-style identifier of a Maven
                      package (e.g. "org.postgresql:postgresql:42.3.1").
                      By default, this argument includes a Postgres SQL JDBC
                      driver.
    """
        classpath = classpath or DEFAULT_JDBC_CLASSPATH
        super().__init__(
            self.URN,
            NamedTupleBasedPayloadBuilder(
                JdbcConfigSchema(
                    location=table_name,
                    config=RowCoder(
                        typing_to_runner_api(Config).row_type.schema).encode(
                            Config(
                                driver_class_name=driver_class_name,
                                jdbc_url=jdbc_url,
                                username=username,
                                password=password,
                                connection_properties=connection_properties,
                                connection_init_sqls=connection_init_sqls,
                                write_statement=statement,
                                read_query=None,
                                fetch_size=None,
                                output_parallelization=None,
                            ))), ),
            expansion_service or default_io_expansion_service(classpath),
        )
Пример #9
0
 def test_typing_payload_builder_with_bytes(self):
     """
 string_utf8 coder will be used even if values are not unicode in python 2.x
 """
     result = self.get_payload_from_typing_hints(self.bytes_values)
     decoded = RowCoder(result.schema).decode(result.payload)
     for key, value in self.values.items():
         self.assertEqual(getattr(decoded, key), value)
Пример #10
0
  def test_row_coder_cloud_object_schema(self):
    schema_proto = schema_pb2.Schema()
    schema_proto_json = json_format.MessageToJson(schema_proto).encode('utf-8')

    coder = RowCoder(schema_proto)

    cloud_object = coder.as_cloud_object()

    self.assertEqual(schema_proto_json, cloud_object['schema'])
Пример #11
0
    def test_row_coder_fail_early_bad_schema(self):
        schema_proto = schema_pb2.Schema(fields=[
            schema_pb2.Field(name="type_with_no_typeinfo",
                             type=schema_pb2.FieldType())
        ])

        # Should raise an exception referencing the problem field
        self.assertRaisesRegex(ValueError, "type_with_no_typeinfo",
                               lambda: RowCoder(schema_proto))
Пример #12
0
    def test_implicit_payload_builder(self):
        builder = ImplicitSchemaPayloadBuilder(PayloadBase.values)
        result = builder.build()

        decoded = RowCoder(result.schema).decode(result.payload)
        for key, value in PayloadBase.values.items():
            # Note the default value in the getattr call.
            # ImplicitSchemaPayloadBuilder omits fields with valu=None since their
            # type cannot be inferred.
            self.assertEqual(getattr(decoded, key, None), value)
Пример #13
0
    def test_create_row_coder_from_named_tuple(self):
        expected_coder = RowCoder(typing_to_runner_api(Person).row_type.schema)
        real_coder = coders_registry.get_coder(Person)

        for test_case in self.PEOPLE:
            self.assertEqual(expected_coder.encode(test_case),
                             real_coder.encode(test_case))

            self.assertEqual(test_case,
                             real_coder.decode(real_coder.encode(test_case)))
Пример #14
0
  def test_encoding_position_add_fields_and_reorder(self):
    old_schema = schema_pb2.Schema(
        id="add_test_old",
        fields=[
            schema_pb2.Field(
                name="f_int32",
                type=schema_pb2.FieldType(atomic_type=schema_pb2.INT32),
            ),
            schema_pb2.Field(
                name="f_str",
                type=schema_pb2.FieldType(atomic_type=schema_pb2.STRING),
            ),
        ])
    new_schema = schema_pb2.Schema(
        encoding_positions_set=True,
        id="add_test_new",
        fields=[
            schema_pb2.Field(
                name="f_new_str",
                type=schema_pb2.FieldType(
                    atomic_type=schema_pb2.STRING, nullable=True),
                encoding_position=2,
            ),
            schema_pb2.Field(
                name="f_int32",
                type=schema_pb2.FieldType(atomic_type=schema_pb2.INT32),
                encoding_position=0,
            ),
            schema_pb2.Field(
                name="f_str",
                type=schema_pb2.FieldType(atomic_type=schema_pb2.STRING),
                encoding_position=1,
            ),
        ])

    Old = named_tuple_from_schema(old_schema)
    New = named_tuple_from_schema(new_schema)
    roundtripped = RowCoder(new_schema).decode(
        RowCoder(old_schema).encode(Old(42, "Hello World!")))

    self.assertEqual(
        New(f_new_str=None, f_int32=42, f_str="Hello World!"), roundtripped)
Пример #15
0
 def test_row_accepts_trailing_zeros_truncated(self):
     expected_coder = RowCoder(
         typing_to_runner_api(NullablePerson).row_type.schema)
     person = NullablePerson(None, np.int32(25), "Westeros",
                             ["Mother of Dragons"], False, None,
                             {"dragons": 3}, None, "NotNull")
     out = expected_coder.encode(person)
     # 9 fields, 1 null byte, field 0, 5, 7 are null
     new_payload = bytes([9, 1, 1 | 1 << 5 | 1 << 7]) + out[4:]
     new_value = expected_coder.decode(new_payload)
     self.assertEqual(person, new_value)
Пример #16
0
    def __init__(
        self,
        table_name,
        driver_class_name,
        jdbc_url,
        username,
        password,
        query=None,
        output_parallelization=None,
        fetch_size=None,
        connection_properties=None,
        connection_init_sqls=None,
        expansion_service=None,
    ):
        """
    Initializes a read operation from Jdbc.

    :param driver_class_name: name of the jdbc driver class
    :param jdbc_url: full jdbc url to the database.
    :param username: database username
    :param password: database password
    :param query: sql query to be executed
    :param output_parallelization: is output parallelization on
    :param fetch_size: how many rows to fetch
    :param connection_properties: properties of the jdbc connection
                                  passed as string with format
                                  [propertyName=property;]*
    :param connection_init_sqls: required only for MySql and MariaDB.
                                 passed as list of strings
    :param expansion_service: The address (host:port) of the ExpansionService.
    """
        super().__init__(
            self.URN,
            NamedTupleBasedPayloadBuilder(
                JdbcConfigSchema(
                    location=table_name,
                    config=RowCoder(
                        typing_to_runner_api(Config).row_type.schema).encode(
                            Config(
                                driver_class_name=driver_class_name,
                                jdbc_url=jdbc_url,
                                username=username,
                                password=password,
                                connection_properties=connection_properties,
                                connection_init_sqls=connection_init_sqls,
                                write_statement=None,
                                read_query=query,
                                fetch_size=fetch_size,
                                output_parallelization=output_parallelization,
                            ))), ),
            expansion_service or default_io_expansion_service(),
        )
Пример #17
0
    def test_implicit_payload_builder_with_bytes(self):
        values = PayloadBase.bytes_values
        builder = ImplicitSchemaPayloadBuilder(values)
        result = builder.build()

        decoded = RowCoder(result.schema).decode(result.payload)
        for key, value in PayloadBase.values.items():
            # Note the default value in the getattr call.
            # ImplicitSchemaPayloadBuilder omits fields with valu=None since their
            # type cannot be inferred.
            self.assertEqual(getattr(decoded, key, None), value)

        # Verify we have not modified a cached type (BEAM-10766)
        # TODO(BEAM-7372): Remove when bytes coercion code is removed.
        self.assertEqual(typehints.List[bytes],
                         convert_to_beam_type(typing.List[bytes]))
Пример #18
0
    def test_implicit_payload_builder_with_bytes(self):
        values = PayloadBase.bytes_values
        builder = ImplicitSchemaPayloadBuilder(values)
        result = builder.build()

        decoded = RowCoder(result.schema).decode(result.payload)
        if sys.version_info[0] < 3:
            for key, value in PayloadBase.bytes_values.items():
                # Note the default value in the getattr call.
                # ImplicitSchemaPayloadBuilder omits fields with valu=None since their
                # type cannot be inferred.
                self.assertEqual(getattr(decoded, key, None), value)
        else:
            for key, value in PayloadBase.values.items():
                # Note the default value in the getattr call.
                # ImplicitSchemaPayloadBuilder omits fields with valu=None since their
                # type cannot be inferred.
                self.assertEqual(getattr(decoded, key, None), value)
Пример #19
0
def parse_string_payload(input_byte):
    payload = external_transforms_pb2.ExternalConfigurationPayload()
    payload.ParseFromString(input_byte)

    return RowCoder(payload.schema).decode(payload.payload)._asdict()
Пример #20
0
 def build(self):
     row = self._get_named_tuple_instance()
     schema = named_tuple_to_schema(type(row))
     return ExternalConfigurationPayload(
         schema=schema, payload=RowCoder(schema).encode(row))
Пример #21
0
 def test_typehints_payload_builder(self):
     result = self.get_payload_from_typing_hints(self.values)
     decoded = RowCoder(result.schema).decode(result.payload)
     for key, value in self.values.items():
         self.assertEqual(getattr(decoded, key), value)