Пример #1
0
class AvroProducer(Producer):
    """
        Kafka Producer client which does avro schema encoding to messages.
        Handles schema registration, Message serialization.

        Constructor takes below parameters

        @:param: config: dict object with config parameters containing url for schema registry (schema.registry.url).
        @:param: default_key_schema: Optional avro schema for key
        @:param: default_value_schema: Optional avro schema for value
    """
    def __init__(self,
                 config,
                 default_key_schema=None,
                 default_value_schema=None):
        if ('schema.registry.url' not in config.keys()):
            raise ValueError("Missing parameter: schema.registry.url")
        schem_registry_url = config["schema.registry.url"]
        del config["schema.registry.url"]

        super(AvroProducer, self).__init__(config)
        self._serializer = MessageSerializer(
            CachedSchemaRegistryClient(url=schem_registry_url))
        self._key_schema = default_key_schema
        self._value_schema = default_value_schema

    def produce(self, **kwargs):
        """
            Sends message to kafka by encoding with specified avro schema
            @:param: topic: topic name
            @:param: value: A dictionary object
            @:param: value_schema : Avro schema for value
            @:param: key: A dictionary object
            @:param: key_schema : Avro schema for key
            @:exception: SerializerError
        """
        # get schemas from  kwargs if defined
        key_schema = kwargs.pop('key_schema', self._key_schema)
        value_schema = kwargs.pop('value_schema', self._value_schema)
        topic = kwargs.pop('topic', None)
        if not topic:
            raise ClientError("Topic name not specified.")
        value = kwargs.pop('value', None)
        key = kwargs.pop('key', None)
        if value:
            if value_schema:
                value = self._serializer.encode_record_with_schema(
                    topic, value_schema, value)
            else:
                raise SerializerError("Avro schema required for value")

        if key:
            if key_schema:
                key = self._serializer.encode_record_with_schema(
                    topic, key_schema, key, True)
            else:
                raise SerializerError("Avro schema required for key")

        super(AvroProducer, self).produce(topic, value, key, **kwargs)
Пример #2
0
class AvroProducer(object):
    def __init__(self,
                 config,
                 default_key_schema=None,
                 default_value_schema=None,
                 schema_registry=None):
        schema_registry_url = config.pop("schema.registry.url", None)
        schema_registry_ca_location = config.pop(
            "schema.registry.ssl.ca.location", None)
        schema_registry_certificate_location = config.pop(
            "schema.registry.ssl.certificate.location", None)
        schema_registry_key_location = config.pop(
            "schema.registry.ssl.key.location", None)

        if schema_registry is None:
            if schema_registry_url is None:
                raise ValueError("Missing parameter: schema.registry.url")

            schema_registry = CachedSchemaRegistryClient(
                url=schema_registry_url,
                ca_location=schema_registry_ca_location,
                cert_location=schema_registry_certificate_location,
                key_location=schema_registry_key_location)
        elif schema_registry_url is not None:
            raise ValueError(
                "Cannot pass schema_registry along with schema.registry.url config"
            )

        self.producer = Producer(config)
        self._serializer = MessageSerializer(schema_registry)
        self._key_schema = default_key_schema
        self._value_schema = default_value_schema

    def flush(self):
        self.producer.flush()

    def produce(self, **kwargs):
        key_schema = kwargs.pop('key_schema', self._key_schema)
        value_schema = kwargs.pop('value_schema', self._value_schema)
        topic = kwargs.pop('topic', None)
        if not topic:
            raise ClientError("Topic name not specified.")
        value = kwargs.pop('value', None)
        key = kwargs.pop('key', None)

        if value is not None:
            if value_schema:
                value = self._serializer.encode_record_with_schema(
                    topic, value_schema, value)
            else:
                raise ValueSerializerError("Avro schema required for values")

        if key is not None:
            if key_schema:
                key = self._serializer.encode_record_with_schema(
                    topic, key_schema, key, True)

        self.producer.produce(topic, value, key, **kwargs)
def produce(config, topic, input_messages):
    """
        produce initiate sending a message to Kafka, call the produce method passing in the input_messages key/value
        and and callback
    Parameters
    ----------
        topic: str
            topic where the input message publish too
        input_messages: dict
            a key/value input messages
        config: dict
            the config values that needed by the produce

     """
    if topic is None:
        logger.debug('Required topic field must be set')
        raise ValueError()

    if len(input_messages) <= 0:
        logger.debug('Required data field must not be empty.')
        raise ValueError()

    bootstrap_servers, schema_registry = producer_config(config)

    producer = Producer(bootstrap_servers)
    admin_client = AdminClient(bootstrap_servers)
    topics = admin_client.list_topics().topics
    #Just to show what's available
    print(topics)

    if not topics:
        print('Not Topics')
        raise RuntimeError()

    sr = CachedSchemaRegistryClient(schema_registry)
    ser = MessageSerializer(sr)
    # get schema
    id, schema, version = sr.get_latest_schema(topic + "-value")
    if schema:
        print('In If Schema')
        for key, value in input_messages.items():
            if validate_uuid4(key):
                print('In validate in For loop')
                serializedMessage = ser.encode_record_with_schema(
                    topic, schema, value)
                producer.produce(topic=topic,
                                 key=key,
                                 value=serializedMessage,
                                 callback=acked)
                # producer.flush() # bad idea, it limits throughput to the broker round trip time
                producer.poll(1)
            else:
                print('In Else of For Loop')
                logger.error('Invalid UUID String: ', key)

    else:
        print('Schema not found for topic name: ', topic)
        print('In Else Schema')
    sys.exit(1)
Пример #4
0
class TestMessageSerializer(unittest.TestCase):
    def setUp(self):
        # need to set up the serializer
        self.client = MockSchemaRegistryClient()
        self.ms = MessageSerializer(self.client)

    def assertMessageIsSame(self, message, expected, schema_id, schema):
        self.assertTrue(message)
        self.assertTrue(len(message) > 5)
        magic, sid = struct.unpack('>bI', message[0:5])
        self.assertEqual(magic, 0)
        self.assertEqual(sid, schema_id)
        decoded_msg, decoded_schema = self.ms.decode_message(message)
        self.assertTrue(decoded_msg)
        self.assertEqual(decoded_msg, expected)
        self.assertEqual(decoded_schema, schema)

    def test_encode_with_schema_id(self):
        adv = avro.loads(data_gen.ADVANCED_SCHEMA)
        basic = avro.loads(data_gen.BASIC_SCHEMA)
        subject = 'test'
        schema_id = self.client.register(subject, basic)

        records = data_gen.BASIC_ITEMS
        for record in records:
            message = self.ms.encode_record_with_schema_id(schema_id, record)
            self.assertMessageIsSame(message, record, schema_id, basic)

        subject = 'test_adv'
        adv_schema_id = self.client.register(subject, adv)
        self.assertNotEqual(adv_schema_id, schema_id)
        records = data_gen.ADVANCED_ITEMS
        for record in records:
            message = self.ms.encode_record_with_schema_id(
                adv_schema_id, record)
            self.assertMessageIsSame(message, record, adv_schema_id, adv)

    def test_encode_record_with_schema(self):
        topic = 'test'
        basic = avro.loads(data_gen.BASIC_SCHEMA)
        subject = 'test-value'
        schema_id = self.client.register(subject, basic)
        records = data_gen.BASIC_ITEMS
        for record in records:
            message = self.ms.encode_record_with_schema(topic, basic, record)
            self.assertMessageIsSame(message, record, schema_id, basic)

    def test_decode_none(self):
        """"null/None messages should decode to None"""

        self.assertIsNone(self.ms.decode_message(None))

    def hash_func(self):
        return hash(str(self))
class TestMessageSerializer(unittest.TestCase):
    def setUp(self):
        # need to set up the serializer
        self.client = MockSchemaRegistryClient()
        self.ms = MessageSerializer(self.client)

    def assertMessageIsSame(self, message, expected, schema_id):
        self.assertTrue(message)
        self.assertTrue(len(message) > 5)
        magic, sid = struct.unpack('>bI', message[0:5])
        self.assertEqual(magic, 0)
        self.assertEqual(sid, schema_id)
        decoded = self.ms.decode_message(message)
        self.assertTrue(decoded)
        self.assertEqual(decoded, expected)

    def test_encode_with_schema_id(self):
        adv = avro.loads(data_gen.ADVANCED_SCHEMA)
        basic = avro.loads(data_gen.BASIC_SCHEMA)
        subject = 'test'
        schema_id = self.client.register(subject, basic)

        records = data_gen.BASIC_ITEMS
        for record in records:
            message = self.ms.encode_record_with_schema_id(schema_id, record)
            self.assertMessageIsSame(message, record, schema_id)

        subject = 'test_adv'
        adv_schema_id = self.client.register(subject, adv)
        self.assertNotEqual(adv_schema_id, schema_id)
        records = data_gen.ADVANCED_ITEMS
        for record in records:
            message = self.ms.encode_record_with_schema_id(adv_schema_id, record)
            self.assertMessageIsSame(message, record, adv_schema_id)

    def test_encode_record_with_schema(self):
        topic = 'test'
        basic = avro.loads(data_gen.BASIC_SCHEMA)
        subject = 'test-value'
        schema_id = self.client.register(subject, basic)
        records = data_gen.BASIC_ITEMS
        for record in records:
            message = self.ms.encode_record_with_schema(topic, basic, record)
            self.assertMessageIsSame(message, record, schema_id)

    def test_decode_none(self):
        """"null/None messages should decode to None"""

        self.assertIsNone(self.ms.decode_message(None))

    def hash_func(self):
        return hash(str(self))
Пример #6
0
class TestMessageSerializer(unittest.TestCase):
    def setUp(self):
        # need to set up the serializer
        # Make RecordSchema and PrimitiveSchema hashable
        schema.RecordSchema.__hash__ = self.hash_func
        schema.PrimitiveSchema.__hash__ = self.hash_func
        self.client = MockSchemaRegistryClient()
        self.ms = MessageSerializer(self.client)

    def assertMessageIsSame(self, message, expected, schema_id):
        self.assertTrue(message)
        self.assertTrue(len(message) > 5)
        magic, sid = struct.unpack('>bI', message[0:5])
        self.assertEqual(magic, 0)
        self.assertEqual(sid, schema_id)
        decoded = self.ms.decode_message(message)
        self.assertTrue(decoded)
        self.assertEqual(decoded, expected)

    def test_encode_with_schema_id(self):
        adv = util.parse_schema_from_string(data_gen.ADVANCED_SCHEMA)
        basic = util.parse_schema_from_string(data_gen.BASIC_SCHEMA)
        subject = 'test'
        schema_id = self.client.register(subject, basic)

        records = data_gen.BASIC_ITEMS
        for record in records:
            message = self.ms.encode_record_with_schema_id(schema_id, record)
            self.assertMessageIsSame(message, record, schema_id)

        subject = 'test_adv'
        adv_schema_id = self.client.register(subject, adv)
        self.assertNotEqual(adv_schema_id, schema_id)
        records = data_gen.ADVANCED_ITEMS
        for record in records:
            message = self.ms.encode_record_with_schema_id(
                adv_schema_id, record)
            self.assertMessageIsSame(message, record, adv_schema_id)

    def test_encode_record_with_schema(self):
        topic = 'test'
        basic = util.parse_schema_from_string(data_gen.BASIC_SCHEMA)
        subject = 'test-value'
        schema_id = self.client.register(subject, basic)
        records = data_gen.BASIC_ITEMS
        for record in records:
            message = self.ms.encode_record_with_schema(topic, basic, record)
            self.assertMessageIsSame(message, record, schema_id)

    def hash_func(self):
        return hash(str(self))
Пример #7
0
def test_select(started_cluster):
    # type: (ClickHouseCluster) -> None

    schema_registry_client = CachedSchemaRegistryClient(
        'http://localhost:{}'.format(started_cluster.schema_registry_port))
    serializer = MessageSerializer(schema_registry_client)

    schema = avro.schema.make_avsc_object({
        'name':
        'test_record',
        'type':
        'record',
        'fields': [{
            'name': 'value',
            'type': 'long'
        }]
    })

    buf = io.BytesIO()
    for x in range(0, 3):
        message = serializer.encode_record_with_schema('test_subject', schema,
                                                       {'value': x})
        buf.write(message)
    data = buf.getvalue()

    instance = started_cluster.instances["dummy"]  # type: ClickHouseInstance
    schema_registry_url = "http://{}:{}".format(
        started_cluster.schema_registry_host, 8081)

    run_query(instance,
              "create table avro_data(value Int64) engine = Memory()")
    settings = {'format_avro_schema_registry_url': schema_registry_url}
    run_query(instance, "insert into avro_data format AvroConfluent", data,
              settings)
    stdout = run_query(instance, "select * from avro_data")
    assert list(map(str.split, stdout.splitlines())) == [
        ["0"],
        ["1"],
        ["2"],
    ]
Пример #8
0
class AvroProducer(Producer):
    """
        Kafka Producer client which does avro schema encoding to messages.
        Handles schema registration, Message serialization.

        Constructor takes below parameters.

        :param dict config: Config parameters containing url for schema registry (``schema.registry.url``)
                            and the standard Kafka client configuration (``bootstrap.servers`` et.al).
        :param str default_key_schema: Optional default avro schema for key
        :param str default_value_schema: Optional default avro schema for value
    """
    def __init__(self,
                 config,
                 default_key_schema=None,
                 default_value_schema=None,
                 schema_registry=None):

        schema_registry_url = config.pop("schema.registry.url", None)
        schema_registry_ca_location = config.pop(
            "schema.registry.ssl.ca.location", None)
        schema_registry_certificate_location = config.pop(
            "schema.registry.ssl.certificate.location", None)
        schema_registry_key_location = config.pop(
            "schema.registry.ssl.key.location", None)

        if schema_registry is None:
            if schema_registry_url is None:
                raise ValueError("Missing parameter: schema.registry.url")

            schema_registry = CachedSchemaRegistryClient(
                url=schema_registry_url,
                ca_location=schema_registry_ca_location,
                cert_location=schema_registry_certificate_location,
                key_location=schema_registry_key_location)
        elif schema_registry_url is not None:
            raise ValueError(
                "Cannot pass schema_registry along with schema.registry.url config"
            )

        super(AvroProducer, self).__init__(config)
        self._serializer = MessageSerializer(schema_registry)
        self._key_schema = default_key_schema
        self._value_schema = default_value_schema

    def produce(self, **kwargs):
        """
            Asynchronously sends message to Kafka by encoding with specified or default avro schema.

            :param str topic: topic name
            :param object value: An object to serialize
            :param str value_schema: Avro schema for value
            :param object key: An object to serialize
            :param str key_schema: Avro schema for key

            Plus any other parameters accepted by confluent_kafka.Producer.produce

            :raises SerializerError: On serialization failure
            :raises BufferError: If producer queue is full.
            :raises KafkaException: For other produce failures.
        """
        # get schemas from  kwargs if defined
        key_schema = kwargs.pop('key_schema', self._key_schema)
        value_schema = kwargs.pop('value_schema', self._value_schema)
        topic = kwargs.pop('topic', None)
        if not topic:
            raise ClientError("Topic name not specified.")
        value = kwargs.pop('value', None)
        key = kwargs.pop('key', None)

        if value is not None:
            if value_schema:
                value = self._serializer.encode_record_with_schema(
                    topic, value_schema, value)
            else:
                raise ValueSerializerError("Avro schema required for values")

        if key is not None:
            if key_schema:
                key = self._serializer.encode_record_with_schema(
                    topic, key_schema, key, True)
            else:
                raise KeySerializerError("Avro schema required for key")

        super(AvroProducer, self).produce(topic, value, key, **kwargs)
Пример #9
0
class TestMessageSerializer(unittest.TestCase):
    def setUp(self):
        # need to set up the serializer
        self.client = MockSchemaRegistryClient()
        self.ms = MessageSerializer(self.client)

    def assertMessageIsSame(self, message, expected, schema_id):
        self.assertTrue(message)
        self.assertTrue(len(message) > 5)
        magic, sid = struct.unpack('>bI', message[0:5])
        self.assertEqual(magic, 0)
        self.assertEqual(sid, schema_id)
        decoded = self.ms.decode_message(message)
        self.assertTrue(decoded)
        self.assertEqual(decoded, expected)

    def test_encode_with_schema_id(self):
        adv = avro.loads(data_gen.ADVANCED_SCHEMA)
        basic = avro.loads(data_gen.BASIC_SCHEMA)
        subject = 'test'
        schema_id = self.client.register(subject, basic)

        records = data_gen.BASIC_ITEMS
        for record in records:
            message = self.ms.encode_record_with_schema_id(schema_id, record)
            self.assertMessageIsSame(message, record, schema_id)

        subject = 'test_adv'
        adv_schema_id = self.client.register(subject, adv)
        self.assertNotEqual(adv_schema_id, schema_id)
        records = data_gen.ADVANCED_ITEMS
        for record in records:
            message = self.ms.encode_record_with_schema_id(
                adv_schema_id, record)
            self.assertMessageIsSame(message, record, adv_schema_id)

    def test_encode_record_with_schema(self):
        topic = 'test'
        basic = avro.loads(data_gen.BASIC_SCHEMA)
        subject = 'test-value'
        schema_id = self.client.register(subject, basic)
        records = data_gen.BASIC_ITEMS
        for record in records:
            message = self.ms.encode_record_with_schema(topic, basic, record)
            self.assertMessageIsSame(message, record, schema_id)

    def test_decode_none(self):
        """"null/None messages should decode to None"""
        self.assertIsNone(self.ms.decode_message(None))

    def test_decode_with_schema(self):
        topic = 'test_specific'

        schema_v1 = avro.loads(
            data_gen.load_schema_file('evolution_schema_v1.avsc'))
        schema_v2 = avro.loads(
            data_gen.load_schema_file('evolution_schema_v2.avsc'))

        dsv1 = SpecificRecordMessageDeserializer(self.client,
                                                 value_schema=schema_v1)
        dsv2 = SpecificRecordMessageDeserializer(self.client,
                                                 value_schema=schema_v2)

        record_v1 = {"name": "suzyq", "age": 27}
        record_v2 = dict(record_v1)
        record_v2['gender'] = 'NONE'

        encoded_v1 = self.ms.encode_record_with_schema(topic, schema_v1,
                                                       record_v1)
        decoded_v1_v1 = dsv1.decode_message(encoded_v1, is_key=False)
        self.assertDictEqual(record_v1, decoded_v1_v1)
        decoded_v1_v2 = dsv2.decode_message(encoded_v1, is_key=False)
        self.assertDictEqual(record_v2, decoded_v1_v2)

        encoded_v2 = self.ms.encode_record_with_schema(topic, schema_v2,
                                                       record_v2)
        decoded_v2_v2 = dsv2.decode_message(encoded_v2, is_key=False)
        self.assertDictEqual(record_v2, decoded_v2_v2)
        decoded_v2_v1 = dsv1.decode_message(encoded_v2, is_key=False)
        self.assertDictEqual(record_v1, decoded_v2_v1)

    def hash_func(self):
        return hash(str(self))
Пример #10
0
class TestMessageSerializer(unittest.TestCase):
    def setUp(self):
        # need to set up the serializer
        self.client = MockSchemaRegistryClient()
        self.ms = MessageSerializer(self.client)

    def assertMessageIsSame(self, message, expected, schema_id):
        self.assertTrue(message)
        self.assertTrue(len(message) > 5)
        magic, sid = struct.unpack('>bI', message[0:5])
        self.assertEqual(magic, 0)
        self.assertEqual(sid, schema_id)
        decoded = self.ms.decode_message(message)
        self.assertTrue(decoded)
        self.assertEqual(decoded, expected)

    def test_encode_with_schema_id(self):
        adv = avro.loads(data_gen.ADVANCED_SCHEMA)
        basic = avro.loads(data_gen.BASIC_SCHEMA)
        subject = 'test'
        schema_id = self.client.register(subject, basic)

        records = data_gen.BASIC_ITEMS
        for record in records:
            message = self.ms.encode_record_with_schema_id(schema_id, record)
            self.assertMessageIsSame(message, record, schema_id)

        subject = 'test_adv'
        adv_schema_id = self.client.register(subject, adv)
        self.assertNotEqual(adv_schema_id, schema_id)
        records = data_gen.ADVANCED_ITEMS
        for record in records:
            message = self.ms.encode_record_with_schema_id(
                adv_schema_id, record)
            self.assertMessageIsSame(message, record, adv_schema_id)

    def test_encode_record_with_schema(self):
        topic = 'test'
        basic = avro.loads(data_gen.BASIC_SCHEMA)
        subject = 'test-value'
        schema_id = self.client.register(subject, basic)
        records = data_gen.BASIC_ITEMS
        for record in records:
            message = self.ms.encode_record_with_schema(topic, basic, record)
            self.assertMessageIsSame(message, record, schema_id)

    @skipIf(version_info < (3, ),
            'unittest.mock.patch not available in Python 2')
    def test_encode_record_with_schema_sets_writers_cache_once(self):
        topic = 'test'
        basic = avro.loads(data_gen.BASIC_SCHEMA)
        subject = 'test-value'
        self.client.register(subject, basic)
        records = data_gen.BASIC_ITEMS
        with patch.object(self.ms, "_get_encoder_func") as encoder_func_mock:
            for record in records:
                self.ms.encode_record_with_schema(topic, basic, record)
        encoder_func_mock.assert_called_once_with(basic)

    def test_decode_none(self):
        """null/None messages should decode to None"""

        self.assertIsNone(self.ms.decode_message(None))

    def hash_func(self):
        return hash(str(self))
class TestMessageSerializer(unittest.TestCase):
    def setUp(self):
        # need to set up the serializer
        self.client = MockSchemaRegistryClient()
        self.ms = MessageSerializer(self.client)

    def assertMessageIsSame(self, message, expected, schema_id):
        self.assertTrue(message)
        self.assertTrue(len(message) > 5)
        magic, sid = struct.unpack('>bI', message[0:5])
        self.assertEqual(magic, 0)
        self.assertEqual(sid, schema_id)
        decoded = self.ms.decode_message(message)
        self.assertTrue(decoded)
        self.assertEqual(decoded, expected)

    def test_encode_with_schema_id(self):
        adv = avro.loads(data_gen.ADVANCED_SCHEMA)
        basic = avro.loads(data_gen.BASIC_SCHEMA)
        subject = 'test'
        schema_id = self.client.register(subject, basic)

        records = data_gen.BASIC_ITEMS
        for record in records:
            message = self.ms.encode_record_with_schema_id(schema_id, record)
            self.assertMessageIsSame(message, record, schema_id)

        subject = 'test_adv'
        adv_schema_id = self.client.register(subject, adv)
        self.assertNotEqual(adv_schema_id, schema_id)
        records = data_gen.ADVANCED_ITEMS
        for record in records:
            message = self.ms.encode_record_with_schema_id(adv_schema_id, record)
            self.assertMessageIsSame(message, record, adv_schema_id)

    def test_encode_record_with_schema(self):
        topic = 'test'
        basic = avro.loads(data_gen.BASIC_SCHEMA)
        subject = 'test-value'
        schema_id = self.client.register(subject, basic)
        records = data_gen.BASIC_ITEMS
        for record in records:
            message = self.ms.encode_record_with_schema(topic, basic, record)
            self.assertMessageIsSame(message, record, schema_id)

    def test_decode_none(self):
        """"null/None messages should decode to None"""

        self.assertIsNone(self.ms.decode_message(None))

    def test__get_subject_for_key_with_topic_name_strategy(self):
        basic = avro.loads(data_gen.BASIC_SCHEMA)
        topic = "topic"
        self.ms.registry_client.key_subject_name_strategy_func = topic_name_strategy  # noqa
        subject = self.ms._get_subject(topic=topic, schema=basic, is_key=True)

        expected = "topic-key"
        self.assertEqual(expected, subject)

    def test__get_subject_for_key_with_record_name_strategy(self):
        basic = avro.loads(data_gen.BASIC_SCHEMA)
        topic = "topic"
        self.ms.registry_client.key_subject_name_strategy_func = record_name_strategy  # noqa
        subject = self.ms._get_subject(topic=topic, schema=basic, is_key=True)

        expected = "python.test.basic.basic"
        self.assertEqual(expected, subject)

    def test__get_subject_for_key_with_topic_record_name_strategy(self):
        basic = avro.loads(data_gen.BASIC_SCHEMA)
        topic = "topic"
        self.ms.registry_client.key_subject_name_strategy_func = topic_record_name_strategy  # noqa
        subject = self.ms._get_subject(topic=topic, schema=basic, is_key=True)

        expected = "topic-python.test.basic.basic"
        self.assertEqual(expected, subject)

    def test__get_subject_for_value_with_topic_name_strategy(self):
        basic = avro.loads(data_gen.BASIC_SCHEMA)
        topic = "topic"
        self.ms.registry_client.value_subject_name_strategy_func = topic_name_strategy  # noqa
        subject = self.ms._get_subject(topic=topic, schema=basic, is_key=False)

        expected = "topic-value"
        self.assertEqual(expected, subject)

    def test__get_subject_for_value_with_record_name_strategy(self):
        basic = avro.loads(data_gen.BASIC_SCHEMA)
        topic = "topic"
        self.ms.registry_client.value_subject_name_strategy_func = record_name_strategy  # noqa
        subject = self.ms._get_subject(topic=topic, schema=basic, is_key=False)

        expected = "python.test.basic.basic"
        self.assertEqual(expected, subject)

    def test__get_subject_for_value_with_topic_record_name_strategy(self):
        basic = avro.loads(data_gen.BASIC_SCHEMA)
        topic = "topic"
        self.ms.registry_client.value_subject_name_strategy_func = topic_record_name_strategy  # noqa
        subject = self.ms._get_subject(topic=topic, schema=basic, is_key=False)

        expected = "topic-python.test.basic.basic"
        self.assertEqual(expected, subject)

    def hash_func(self):
        return hash(str(self))
class AvroProducer(Producer):
    """
        Kafka Producer client which does avro schema encoding to messages.
        Handles schema registration, Message serialization.

        Constructor takes below parameters.

        :param dict config: Config parameters containing url for schema registry (``schema.registry.url``)
                            and the standard Kafka client configuration (``bootstrap.servers`` et.al).
        :param str default_key_schema: Optional default avro schema for key
        :param str default_value_schema: Optional default avro schema for value
    """

    def __init__(self, config, default_key_schema=None,
                 default_value_schema=None, schema_registry=None):

        sr_conf = {key.replace("schema.registry.", ""): value
                   for key, value in config.items() if key.startswith("schema.registry")}

        if sr_conf.get("basic.auth.credentials.source") == 'SASL_INHERIT':
            sr_conf['sasl.mechanisms'] = config.get('sasl.mechanisms', '')
            sr_conf['sasl.username'] = config.get('sasl.username', '')
            sr_conf['sasl.password'] = config.get('sasl.password', '')

        ap_conf = {key: value
                   for key, value in config.items() if not key.startswith("schema.registry")}

        if schema_registry is None:
            schema_registry = CachedSchemaRegistryClient(sr_conf)
        elif sr_conf.get("url", None) is not None:
            raise ValueError("Cannot pass schema_registry along with schema.registry.url config")

        super(AvroProducer, self).__init__(ap_conf)
        self._serializer = MessageSerializer(schema_registry)
        self._key_schema = default_key_schema
        self._value_schema = default_value_schema

    def produce(self, **kwargs):
        """
            Asynchronously sends message to Kafka by encoding with specified or default avro schema.

            :param str topic: topic name
            :param object value: An object to serialize
            :param str value_schema: Avro schema for value
            :param object key: An object to serialize
            :param str key_schema: Avro schema for key

            Plus any other parameters accepted by confluent_kafka.Producer.produce

            :raises SerializerError: On serialization failure
            :raises BufferError: If producer queue is full.
            :raises KafkaException: For other produce failures.
        """
        # get schemas from  kwargs if defined
        key_schema = kwargs.pop('key_schema', self._key_schema)
        value_schema = kwargs.pop('value_schema', self._value_schema)
        topic = kwargs.pop('topic', None)
        if not topic:
            raise ClientError("Topic name not specified.")
        value = kwargs.pop('value', None)
        key = kwargs.pop('key', None)

        if value is not None:
            if value_schema:
                value = self._serializer.encode_record_with_schema(topic, value_schema, value)
            else:
                raise ValueSerializerError("Avro schema required for values")

        if key is not None:
            if key_schema:
                key = self._serializer.encode_record_with_schema(topic, key_schema, key, True)
            else:
                raise KeySerializerError("Avro schema required for key")

        super(AvroProducer, self).produce(topic, value, key, **kwargs)
Пример #13
0
class AvroProducer(Producer):
    """
        Kafka Producer client which does avro schema encoding to messages.
        Handles schema registration, Message serialization.

        Constructor takes below parameters.

        :param dict config: Config parameters containing url for schema registry (``schema.registry.url``)
                            and the standard Kafka client configuration (``bootstrap.servers`` et.al).
        :param str default_key_schema: Optional default avro schema for key
        :param str default_value_schema: Optional default avro schema for value
    """
    def __init__(self,
                 config,
                 default_key_schema=None,
                 default_value_schema=None,
                 schema_registry=None,
                 **kwargs):

        sr_conf = {
            key.replace("schema.registry.", ""): value
            for key, value in config.items()
            if key.startswith("schema.registry")
        }

        if sr_conf.get("basic.auth.credentials.source") == 'SASL_INHERIT':
            # Fallback to plural 'mechanisms' for backward compatibility
            sr_conf['sasl.mechanism'] = config.get(
                'sasl.mechanism', config.get('sasl.mechanisms', ''))
            sr_conf['sasl.username'] = config.get('sasl.username', '')
            sr_conf['sasl.password'] = config.get('sasl.password', '')
            sr_conf['auto.register.schemas'] = config.get(
                'auto.register.schemas', True)

        ap_conf = {
            key: value
            for key, value in config.items()
            if not key.startswith("schema.registry")
        }

        if schema_registry is None:
            schema_registry = CachedSchemaRegistryClient(sr_conf)
        elif sr_conf.get("url", None) is not None:
            raise ValueError(
                "Cannot pass schema_registry along with schema.registry.url config"
            )

        super(AvroProducer, self).__init__(ap_conf, **kwargs)
        self._serializer = MessageSerializer(schema_registry)
        self._key_schema = default_key_schema
        self._value_schema = default_value_schema

    def produce(self, **kwargs):
        """
            Asynchronously sends message to Kafka by encoding with specified or default avro schema.

            :param str topic: topic name
            :param object value: An object to serialize
            :param str value_schema: Avro schema for value
            :param object key: An object to serialize
            :param str key_schema: Avro schema for key

            Plus any other parameters accepted by confluent_kafka.Producer.produce

            :raises SerializerError: On serialization failure
            :raises BufferError: If producer queue is full.
            :raises KafkaException: For other produce failures.
        """
        # get schemas from  kwargs if defined
        key_schema = kwargs.pop('key_schema', self._key_schema)
        value_schema = kwargs.pop('value_schema', self._value_schema)
        topic = kwargs.pop('topic', None)
        if not topic:
            raise ClientError("Topic name not specified.")
        value = kwargs.pop('value', None)
        key = kwargs.pop('key', None)

        if value is not None:
            if value_schema:
                value = self._serializer.encode_record_with_schema(
                    topic, value_schema, value)
            else:
                raise ValueSerializerError("Avro schema required for values")

        if key is not None:
            if key_schema:
                key = self._serializer.encode_record_with_schema(
                    topic, key_schema, key, True)
            else:
                raise KeySerializerError("Avro schema required for key")

        super(AvroProducer, self).produce(topic, value, key, **kwargs)
class AvroProducer(object):
    '''
        Kafka Producer client which does avro schema encoding to messages.
        Handles schema registration, Message serialization.

        Constructor takes below parameters

        @:param: producer: confluent_kafka.Producer object
        @:param: message_serializer: Message Serializer object
    '''
    def __init__(self,
                 producer,
                 schema_registry_url,
                 default_key_schema=None,
                 default_value_schema=None
                 ):  # real signature unknown; restored from __doc__
        self._producer = producer
        self._serializer = MessageSerializer(
            CachedSchemaRegistryClient(url=schema_registry_url))
        self.key_schema = default_key_schema
        self.value_schema = default_value_schema

    def produce(self, **kwargs):
        '''
            Sends message to kafka by encoding with specified avro schema
            @:param: topic: topic name
            @:param: value: A dictionary object
            @:param: value_schema : Avro schema for value
            @:param: key: A dictionary object
            @:param: key_schema : Avro schema for key
            @:exception: SerializerError
        '''
        # get schemas from  kwargs if defined
        key_schema = kwargs.pop('key_schema', None)
        value_schema = kwargs.pop('value_schema', None)
        topic = kwargs.pop('topic', None)
        if not topic:
            log.error("Topic name not specified.")
            raise ClientError("Topic name not specified.")
        value = kwargs.pop('value', None)
        key = kwargs.pop('key', None)

        # if key_schema is not initialized, fall back on default key_schema passed as construction param.
        if not key_schema:
            key_schema = self.key_schema

        # if value_schema is not initialized, fall back on default value_schema passed as construction param.
        if not value_schema:
            value_schema = self.value_schema

        if value:
            if value_schema:
                value = self._serializer.encode_record_with_schema(
                    topic, value_schema, value)
            else:
                log.error("Schema required for value serialization")
                raise SerializerError("Avro schema required for value")

        if key:
            if key_schema:
                key = self._serializer.encode_record_with_schema(
                    topic, key_schema, key, True)
            else:
                log.error("Schema required for key serialization")
                raise SerializerError("Avro schema required for key")

        self._producer.produce(topic, value, key, **kwargs)

    def poll(self, timeout):
        self._producer.poll(timeout)

    def flush(self, *args, **kwargs):
        self._producer.flush(*args, **kwargs)
def delivery_report(err, msg):
    if err is not None:
        logging.error("Delivery failed for User record {}: {}".format(
            msg.key(), err))
    else:
        logging.info(
            "Record successfully delivered -> TOPIC: {} - PARTITION: [{}] - OFFSET {}"
            .format(msg.topic(), msg.partition(), msg.offset()))


producer = Producer(config)

# Avro data needs to be loaded as an encoded dict with schema and the Producer takes care of inserting it properly
# Just load 5 records into the source topic
for i in range(5):
    data.id = i
    data.name = "Value of the string that was inserted is ~~~~~~ ========= -----> " + str(
        i)
    data_dict = data.__dict__
    avro_record = serializer.encode_record_with_schema(topic="avro-topic-1",
                                                       schema=RECORD_SCHEMA,
                                                       record=data_dict,
                                                       is_key=False)
    producer.produce("avro-topic",
                     key=str(1),
                     value=avro_record,
                     on_delivery=(delivery_report))
    time.sleep(3)

# EOS - Synchronous writes
producer.flush()