def __init__(self, config: KafkaEmitterConfig): self.config = config schema_registry_conf = { "url": self.config.connection.schema_registry_url, **self.config.connection.schema_registry_config, } schema_registry_client = SchemaRegistryClient(schema_registry_conf) def convert_mce_to_dict( mce: MetadataChangeEvent, ctx: SerializationContext ) -> dict: tuple_encoding = mce.to_obj(tuples=True) return tuple_encoding avro_serializer = AvroSerializer( schema_str=getMetadataChangeEventSchema(), schema_registry_client=schema_registry_client, to_dict=convert_mce_to_dict, ) producer_config = { "bootstrap.servers": self.config.connection.bootstrap, "key.serializer": StringSerializer("utf_8"), "value.serializer": avro_serializer, **self.config.connection.producer_config, } self.producer = SerializingProducer(producer_config)
def test_serde_to_avro(pytestconfig: PytestConfig, json_filename: str) -> None: # In this test, we want to read in from JSON -> MCE object. # Next we serialize from MCE to Avro and then deserialize back to MCE. # Finally, we want to compare the two MCE objects. json_path = pytestconfig.rootpath / json_filename mces = list(iterate_mce_file(str(json_path))) # Serialize to Avro. parsed_schema = fastavro.parse_schema(json.loads(getMetadataChangeEventSchema())) fo = io.BytesIO() out_records = [mce.to_obj(tuples=True) for mce in mces] fastavro.writer(fo, parsed_schema, out_records) # Deserialized from Avro. fo.seek(0) in_records = list(fastavro.reader(fo, return_record_name=True)) in_mces = [ MetadataChangeEventClass.from_obj(record, tuples=True) for record in in_records ] # Check diff assert len(mces) == len(in_mces) for i in range(len(mces)): assert mces[i] == in_mces[i]
def __init__(self, config: KafkaEmitterConfig): self.config = config schema_registry_conf = { "url": self.config.connection.schema_registry_url, **self.config.connection.schema_registry_config, } schema_registry_client = SchemaRegistryClient(schema_registry_conf) def convert_mce_to_dict(mce: MetadataChangeEvent, ctx: SerializationContext) -> dict: tuple_encoding = mce.to_obj(tuples=True) return tuple_encoding mce_avro_serializer = AvroSerializer( schema_str=getMetadataChangeEventSchema(), schema_registry_client=schema_registry_client, to_dict=convert_mce_to_dict, ) def convert_mcp_to_dict( mcp: Union[MetadataChangeProposal, MetadataChangeProposalWrapper], ctx: SerializationContext, ) -> dict: tuple_encoding = mcp.to_obj(tuples=True) return tuple_encoding mcp_avro_serializer = AvroSerializer( schema_str=getMetadataChangeProposalSchema(), schema_registry_client=schema_registry_client, to_dict=convert_mcp_to_dict, ) # We maintain a map of producers for each kind of event producers_config = { MCE_KEY: { "bootstrap.servers": self.config.connection.bootstrap, "key.serializer": StringSerializer("utf_8"), "value.serializer": mce_avro_serializer, **self.config.connection.producer_config, }, MCP_KEY: { "bootstrap.servers": self.config.connection.bootstrap, "key.serializer": StringSerializer("utf_8"), "value.serializer": mcp_avro_serializer, **self.config.connection.producer_config, }, } self.producers = { key: SerializingProducer(value) for (key, value) in producers_config.items() }