def _make_offset_consumer(self) -> DeserializingConsumer: """ Creates the underlying instance of :class:`confluent_kafka.avro.AvroConsumer` which is used to fetch the last committed producer offsets. """ key_deserializer = AvroDeserializer(self.schema_registry_client) value_deserializer = AvroDeserializer(self.schema_registry_client) config = { "bootstrap.servers": self.config["bootstrap.servers"], "key.deserializer": key_deserializer, "value.deserializer": value_deserializer, "enable.partition.eof": True, "group.id": f'{self.config["offset_topic"]}_fetcher', "default.topic.config": { "auto.offset.reset": "latest" }, **self.config["kafka_opts"], **self.config["kafka_consumer_opts"], } offset_consumer = DeserializingConsumer(config) logger.info( f"Offset Consumer created with config: {pformat(config, indent=2)}" ) return offset_consumer
def _make_consumer(self) -> DeserializingConsumer: schema_registry_client = SchemaRegistryClient( {"url": self.config["schema_registry"]}) key_deserializer = AvroDeserializer(schema_registry_client) value_deserializer = AvroDeserializer(schema_registry_client) config = { "bootstrap.servers": ",".join(self.config["bootstrap_servers"]), "key.deserializer": key_deserializer, "value.deserializer": value_deserializer, "enable.auto.commit": False, "enable.partition.eof": True, "group.id": self.config["group_id"], "default.topic.config": { "auto.offset.reset": "earliest" }, **self.config["kafka_opts"], } hash_sensitive_values = self.config["hash_sensitive_values"] consumer = DeserializingConsumer(config) hidden_config = hide_sensitive_values( config, hash_sensitive_values=hash_sensitive_values) logger.info( f"AvroConsumer created with config: {pformat(hidden_config, indent=2)}" ) # noinspection PyArgumentList consumer.subscribe(self.config["topics"], on_assign=self._on_assign, on_revoke=self._on_revoke) return consumer
def main(args): topic = args.topic key_schema_str = open('schema/KeySchema.avsc', "r").read() value_schema_str = open('schema/ValueSchema.avsc', "r").read() sr_conf = {'url': args.schema_registry} schema_registry_client = SchemaRegistryClient(sr_conf) avro_key_deserializer = AvroDeserializer(key_schema_str, schema_registry_client, dict_to_user_quote_key) avro_value_deserializer = AvroDeserializer(value_schema_str, schema_registry_client, dict_to_user_quote_value) consumer_conf = { 'bootstrap.servers': args.bootstrap_servers, 'key.deserializer': avro_key_deserializer, 'value.deserializer': avro_value_deserializer, 'group.id': args.group, 'auto.offset.reset': "earliest" } consumer = DeserializingConsumer(consumer_conf) consumer.subscribe([topic]) while True: try: # SIGINT can't be handled when polling, limit timeout to 1 second. msg = consumer.poll(1.0) if msg is None: continue user_quote = msg.value() if user_quote is not None: print("User {} Quote record: product_id: {}\n" "\tquoted_price: {}\n" "\tquoted_quantity: {}\n" "\tuser_note: {}\n".format(msg.key().user_id, user_quote.product_id, user_quote.quoted_price, user_quote.quoted_quantity, user_quote.user_note)) except KeyboardInterrupt: break consumer.close()
def run_consumer(shutdown_flag, clients, lock): print("Starting Kafka Consumer.") schema_registry_client = SchemaRegistryClient( {"url": "http://localhost:8081"}) deserializer = AvroDeserializer(schema_registry_client) config = { "bootstrap.servers": "localhost:9092", "group.id": "dashboard-demo", "value.deserializer": deserializer } consumer = DeserializingConsumer(config) consumer.subscribe(["DASHBOARD"]) while not shutdown_flag.done(): msg = consumer.poll(0.2) if msg is None: print("Waiting...") elif msg.error(): print(f"ERROR: {msg.error()}") else: value = msg.value() formatted = simplejson.dumps(value) print(f"Sending {formatted} to {clients}") with lock: websockets.broadcast(clients, formatted) print("Closing Kafka Consumer") consumer.close()
def test_avro_record_serialization_custom(kafka_cluster): """ Tests basic Avro serializer to_dict and from_dict object hook functionality. Args: kafka_cluster (KafkaClusterFixture): cluster fixture """ topic = kafka_cluster.create_topic("serialization-avro") sr = kafka_cluster.schema_registry() user = User('Bowie', 47, 'purple') value_serializer = AvroSerializer(sr, User.schema_str, lambda user, ctx: dict(name=user.name, favorite_number=user.favorite_number, favorite_color=user.favorite_color)) value_deserializer = AvroDeserializer(sr, User.schema_str, lambda user_dict, ctx: User(**user_dict)) producer = kafka_cluster.producer(value_serializer=value_serializer) producer.produce(topic, value=user, partition=0) producer.flush() consumer = kafka_cluster.consumer(value_deserializer=value_deserializer) consumer.assign([TopicPartition(topic, 0)]) msg = consumer.poll() user2 = msg.value() assert user2 == user
def _make_deserializer(self): return { SchemaType.AVRO: AvroDeserializer(self.sr_client, AVRO_SCHEMA, from_dict=lambda d, _: AvroPayload(d['val'])), SchemaType.PROTOBUF: ProtobufDeserializer(ProtobufPayloadClass) }[self.schema_type]
def create_deserializer(self): self.deserializer = {} if self.in_topic is not None: for topic in self.in_topic: if self.in_schema[topic] is None: self.deserializer[topic] = StringDeserializer("utf_8") else: schema_str = self.in_schema[topic].schema_str self.deserializer[topic] = AvroDeserializer( schema_str, self.schema_registry)
def main(): schema_registry_client = SchemaRegistryClient({'url': SCHEMA_REGISTRY_URL}) avro_deserializer = AvroDeserializer( schema_registry_client=schema_registry_client) string_deserializer = StringDeserializer('utf_8') consumer_conf = { 'bootstrap.servers': BOOTSTRAP_SERVERS, 'key.deserializer': string_deserializer, 'max.poll.interval.ms': MAX_POLL_INTERVAL_MS, 'value.deserializer': avro_deserializer, 'group.id': CONSUMER_GROUP } consumer = DeserializingConsumer(consumer_conf) consumer.subscribe([TOPIC]) while True: try: msg = consumer.poll(1.0) if msg is None: continue msg_value = msg.value() if msg_value is not None: try: measurements = list(dict(msg_value).get("measurements")) measurements_df = pd.DataFrame(measurements) groups = measurements_df.groupby("tenant") for _, group in groups: tenant = group.iloc[0]['tenant'] device_registry = DeviceRegistry( tenant, AIRQO_BASE_URL) group_measurements = list( group.to_dict(orient="records")) for i in range(0, len(group_measurements), int(REQUEST_BODY_SIZE)): measurements_list = group_measurements[ i:i + int(REQUEST_BODY_SIZE)] device_registry.insert_events(measurements_list) except Exception as ex: print(ex) except KeyboardInterrupt: break consumer.close()
def test_delivery_report_serialization(kafka_cluster, load_avsc, avsc, data, record_type): """ Tests basic Avro serializer functionality Args: kafka_cluster (KafkaClusterFixture): cluster fixture load_avsc (callable(str)): Avro file reader avsc (str) avsc: Avro schema file data (object): data to be serialized Raises: AssertionError on test failure """ topic = kafka_cluster.create_topic("serialization-avro-dr") sr = kafka_cluster.schema_registry() schema_str = load_avsc(avsc) value_serializer = AvroSerializer(sr, schema_str) value_deserializer = AvroDeserializer(sr, schema_str) producer = kafka_cluster.producer(value_serializer=value_serializer) def assert_cb(err, msg): actual = value_deserializer(SerializationContext(topic, MessageField.VALUE), msg.value()) if record_type == "record": assert [v == actual[k] for k, v in data.items()] elif record_type == 'float': assert data == pytest.approx(actual) else: assert actual == data producer.produce(topic, value=data, partition=0, on_delivery=assert_cb) producer.flush() consumer = kafka_cluster.consumer(value_deserializer=value_deserializer) consumer.assign([TopicPartition(topic, 0)]) msg = consumer.poll() actual = msg.value() # schema may include default which need not exist in the original if record_type == 'record': assert [v == actual[k] for k, v in data.items()] elif record_type == 'float': assert data == pytest.approx(actual) else: assert actual == data
def main(args): topic = args.topic schema_str = """ { "namespace": "confluent.io.examples.serialization.avro", "name": "User", "type": "record", "fields": [ {"name": "name", "type": "string"}, {"name": "favorite_number", "type": "int"}, {"name": "favorite_color", "type": "string"} ] } """ sr_conf = {'url': args.schema_registry} schema_registry_client = SchemaRegistryClient(sr_conf) avro_deserializer = AvroDeserializer(schema_str, schema_registry_client, dict_to_user) string_deserializer = StringDeserializer('utf_8') consumer_conf = {'bootstrap.servers': args.bootstrap_servers, 'key.deserializer': string_deserializer, 'value.deserializer': avro_deserializer, 'group.id': args.group, 'auto.offset.reset': "earliest"} consumer = DeserializingConsumer(consumer_conf) consumer.subscribe([topic]) while True: try: # SIGINT can't be handled when polling, limit timeout to 1 second. msg = consumer.poll(1.0) if msg is None: continue user = msg.value() if user is not None: print("User record {}: name: {}\n" "\tfavorite_number: {}\n" "\tfavorite_color: {}\n" .format(msg.key(), user.name, user.favorite_color, user.favorite_number)) except KeyboardInterrupt: break consumer.close()
def plain_avro_consumer(running_cluster_config: Dict[str, str], topic_and_partitions: Tuple[str, int]): topic_id, _ = topic_and_partitions schema_registry_client = SchemaRegistryClient( {"url": running_cluster_config["schema-registry"]}) key_deserializer = AvroDeserializer(schema_registry_client) value_deserializer = AvroDeserializer(schema_registry_client) config = { "bootstrap.servers": running_cluster_config["broker"], "group.id": f"{topic_id}_consumer", "key.deserializer": key_deserializer, "value.deserializer": value_deserializer, "enable.partition.eof": False, "default.topic.config": { "auto.offset.reset": "earliest" }, "allow.auto.create.topics": True, } consumer = DeserializingConsumer(config) consumer.subscribe([topic_id]) consumer.list_topics() return consumer
def __init__(self, bootstrap_servers: str, topic: str, group: str, callback: Callable[[Message], None], schema_registry_url, schema, poll_timeout: float = 1.0, config=None): super().__init__( bootstrap_servers, topic, group, callback, AvroDeserializer( schema, SchemaRegistryClient({"url": schema_registry_url})), poll_timeout, config)
def run_consumer(container_manager): schema_registry_conf = {'url': config['kafka']['schema_registry']} schema_registry_client = SchemaRegistryClient(schema_registry_conf) avro_deserializer = AvroDeserializer(schemas.run_record_schema, schema_registry_client) string_deserializer = StringDeserializer('utf_8') conf = { 'bootstrap.servers': config['kafka']['servers'], 'key.deserializer': string_deserializer, 'value.deserializer': avro_deserializer, 'group.id': "runs-consumers", 'auto.offset.reset': 'earliest', 'enable.auto.commit': 'false' } consumer = DeserializingConsumer(conf) print('[+] Listening for incoming runs') try: consumer_topics = [config['kafka']['runs-topic']] consumer.subscribe(consumer_topics) while True: try: msg = consumer.poll(timeout=1.0) if msg is None: continue if msg.error(): raise KafkaException(msg.error()) else: print('[-] Run initialization') print(msg.value()) consumer.commit(asynchronous=False) # handlers.handle_run_execution(container_manager, msg.value()) threading.Thread(target=handlers.handle_run_execution, args=(container_manager, msg.value())).start() except ConsumeError as e: print( f'[Exception] error_code: {e.code()} message: {e.message()} exception: {e}' ) finally: consumer.close()
def process_row(serialized_data): schema = ''' { "namespace": "org.mddarr.rides.event.dto", "type": "record", "name": "AvroRideCoordinate", "fields": [ {"name": "dataID", "type": "string"}, {"name": "value", "type": "double"} ] } ''' schemaRegistryClient = SchemaRegistryClient( {"url": "http://localhost:8081"}) avroDeserializer = AvroDeserializer(schema, schemaRegistryClient) serializationContext = SerializationContext("time-series", schema) deserialized_row = avroDeserializer(serialized_data, serializationContext) return str(deserialized_row['value'])
def main(args): topic = args.topic schema_str = MetricSchema sr_conf = {'url': args.schema_registry} schema_registry_client = SchemaRegistryClient(sr_conf) avro_deserializer = AvroDeserializer(schema_str, schema_registry_client) string_deserializer = StringDeserializer('utf_8') consumer_conf = { 'bootstrap.servers': args.bootstrap_servers, 'key.deserializer': string_deserializer, 'value.deserializer': avro_deserializer, 'group.id': args.group, 'auto.offset.reset': "earliest" } consumer = DeserializingConsumer(consumer_conf) consumer.subscribe([topic]) client = InfluxDBClient(host=args.host_influx, port=8086, username='******', password='******') while True: try: # SIGINT can't be handled when polling, limit timeout to 1 second. msg = consumer.poll(1.0) if msg is None: continue timespent = msg.value() if timespent is not None: print("time ==>", timespent) print(timespent["metricName"]) print(timespent["time"]) client.switch_database('datascience') json_body = [{ "measurement": "metric", "fields": { "name": timespent["metricName"], "value": timespent["time"] } }] client.write_points(json_body) except KeyboardInterrupt: break consumer.close()
def create_consumer(self, registry_client): """ Subscribes to topic defined in configs and creates a consumer to deserialize messages from topic :param registry_client: SchemaRegistryClient object get this from register_client() :return: DeserializingConsumer object """ metadata_schema = None topic = None if self.metadata_type == "COLLECTION": metadata_schema = registry_client.get_latest_version( self.collection_topic + '-value').schema.schema_str topic = self.collection_topic if self.metadata_type == "GRANULE": metadata_schema = registry_client.get_latest_version( self.granule_topic + '-value').schema.schema_str topic = self.granule_topic metadata_deserializer = AvroDeserializer(metadata_schema, registry_client) consumer_conf = {'bootstrap.servers': self.brokers} if self.security: consumer_conf['security.protocol'] = 'SSL' consumer_conf['ssl.ca.location'] = self.conf['security']['caLoc'] consumer_conf['ssl.key.location'] = self.conf['security']['keyLoc'] consumer_conf['ssl.certificate.location'] = self.conf['security'][ 'certLoc'] meta_consumer_conf = consumer_conf meta_consumer_conf['key.deserializer'] = StringDeserializer('utf-8') meta_consumer_conf['value.deserializer'] = metadata_deserializer meta_consumer_conf['group.id'] = self.group_id meta_consumer_conf['auto.offset.reset'] = self.auto_offset_reset metadata_consumer = DeserializingConsumer(meta_consumer_conf) metadata_consumer.subscribe([topic]) return metadata_consumer
def process_row(serialized_data): schema = ''' { "namespace": "org.mddarr.rides.event.dto", "type": "record", "name": "AvroRideCoordinate", "fields": [ {"name": "eventime", "type": "long"}, {"name": "latitude", "type": "double"}, {"name": "longitude", "type": "double"} ] } ''' schemaRegistryClient = SchemaRegistryClient({"url": "http://localhost:8081"}) avroDeserializer = AvroDeserializer(schema, schemaRegistryClient) serializationContext = SerializationContext("coordinates", schema) deserialized_row = avroDeserializer(serialized_data, serializationContext) print("THE DESERIALIZED ROW LOOKS LIKE " + str(deserialized_row)) return [deserialized_row['latitude'], deserialized_row['longitude'],float(deserialized_row['eventime'])]
def __init__(self, value_schema, topic_name = "kafka-avro-producer", groupID = 'KafkaAvroConsumer', autocommit = True): # Schema Registry configuration self.schema_registry_conf = self.getSchemaRegistryConf() # Schema Registry Client self.schema_registry_client = SchemaRegistryClient(self.schema_registry_conf) # Key Deserializer self.key_deserializer = StringDeserializer('utf_8') # Value Deserializer # Presenting the schema to the Avro Deserializer is needed at the moment. In the future it might change # https://github.com/confluentinc/confluent-kafka-python/issues/834 self.value_deserializer = AvroDeserializer(value_schema,self.schema_registry_client) # Get the consumer configuration self.consumer_conf = self.getConsumerConfiguration(groupID, autocommit) # Create the consumer self.consumer = DeserializingConsumer(self.consumer_conf) # Subscribe to the topic self.consumer.subscribe([topic_name])
def __init__(self, consumer_name, value_schema, topic_name = "kafka-avro-producer", groupID = 'KafkaAvroConsumer', autocommit = True): # Consumer name for logging purposes self.logging_prefix = '['+ consumer_name + '][KafkaAvroConsumer]' # Schema Registry configuration self.schema_registry_conf = EventBackboneConfig.getSchemaRegistryConf() # Schema Registry Client self.schema_registry_client = SchemaRegistryClient(self.schema_registry_conf) # Key Deserializer self.key_deserializer = StringDeserializer('utf_8') # Get Schema for the value self.schema_id_value = self.schema_registry_client.get_latest_version(topic_name + "-value").schema_id # print('The Schema ID for the value is: {}'.format(self.schema_id_value)) self.value_schema = self.schema_registry_client.get_schema(self.schema_id_value).schema_str print(self.logging_prefix + ' - Value Subject: {}'.format(topic_name)) print(self.logging_prefix + ' - Value Schema:') print(self.logging_prefix + ' - -------------\n') print(self.logging_prefix + ' - ' + self.value_schema + '\n') # Value Deserializer # Presenting the schema to the Avro Deserializer is needed at the moment. In the future it might change # https://github.com/confluentinc/confluent-kafka-python/issues/834 self.value_deserializer = AvroDeserializer(self.value_schema,self.schema_registry_client) # Get the consumer configuration self.consumer_conf = EventBackboneConfig.getConsumerConfiguration(groupID, autocommit, self.key_deserializer, self.value_deserializer) # Create the consumer self.consumer = DeserializingConsumer(self.consumer_conf) # Print consumer configuration EventBackboneConfig.printConsumerConfiguration(self.logging_prefix,self.consumer_conf,self.schema_registry_conf['url']) # Subscribe to the topic self.consumer.subscribe([topic_name])
def test_avro_record_serialization(kafka_cluster, load_avsc, avsc, data, record_type): """ Tests basic Avro serializer functionality Args: kafka_cluster (KafkaClusterFixture): cluster fixture load_avsc (callable(str)): Avro file reader avsc (str) avsc: Avro schema file data (object): data to be serialized Raises: AssertionError on test failure """ topic = kafka_cluster.create_topic("serialization-avro") sr = kafka_cluster.schema_registry() schema_str = load_avsc(avsc) value_serializer = AvroSerializer(sr, schema_str) value_deserializer = AvroDeserializer(sr, schema_str) producer = kafka_cluster.producer(value_serializer=value_serializer) producer.produce(topic, value=data, partition=0) producer.flush() consumer = kafka_cluster.consumer(value_deserializer=value_deserializer) consumer.assign([TopicPartition(topic, 0)]) msg = consumer.poll() actual = msg.value() if record_type == 'record': assert [v == actual[k] for k, v in data.items()] elif record_type == 'float': assert data == pytest.approx(actual) else: assert actual == data
def receive_record(args): """ Receives Record using a DeserializingConsumer & AvroDeserializer """ topics = [args.topic.rstrip()] schema_registry_config = {'url': args.schema_registry} schema_registry_client = SchemaRegistryClient(schema_registry_config) avro_deserializer = AvroDeserializer(schema_registry_client, DATA_SCHEMA, dict_to_data) string_deserializer = StringDeserializer('utf_8') consumer_config = { 'bootstrap.servers': args.bootstrap_servers, 'key.deserializer': string_deserializer, 'value.deserializer': avro_deserializer, 'group.id': args.group, 'auto.offset.reset': 'earliest' } consumer = DeserializingConsumer(consumer_config) consumer.subscribe(topics) print(f'Consuming data records from topic(s) {topics}. ^C to exit.') while True: try: # SIGINT can't be handled when polling, limit timeout to 1 second. msg = consumer.poll(10.0) if msg is None: print('\t---Waiting. . .') continue data = msg.value() if data is not None: print(f'Data record {msg.key()}:\n' f'\tValues: {data}') except KeyboardInterrupt: break print('\nClosing consumer.') consumer.close()
def main(args): topic = args.topic sr_conf = {'url': args.schema_registry} schema_registry_client = SchemaRegistryClient(sr_conf) schema_obj = schema_registry_client.get_latest_version(subject_name='example_serde_avro-value') avro_deserializer = AvroDeserializer(schema_obj.schema.schema_str, schema_registry_client, dict_to_user) string_deserializer = StringDeserializer('utf_8') consumer_conf = {'bootstrap.servers': args.bootstrap_servers, 'key.deserializer': string_deserializer, 'value.deserializer': avro_deserializer, 'group.id': args.group, 'auto.offset.reset': "earliest"} consumer = DeserializingConsumer(consumer_conf) consumer.subscribe([topic]) while True: try: msg = consumer.poll(1.0) if msg is None: continue user = msg.value() if user is not None: print(f"User record {msg.key()}:\n name: {user.name}\n" f"\tfavorite_number: {user.favorite_color}\n" f"\tfavorite_color: {user.favorite_number}\n") except KeyboardInterrupt: break consumer.close()
if __name__ == '__main__': # Read arguments and configurations and initialize args = ccloud_lib.parse_args() config_file = args.config_file topic = args.topic conf = ccloud_lib.read_ccloud_config(config_file) schema_registry_conf = { 'url': conf['schema.registry.url'], 'basic.auth.user.info': conf['basic.auth.user.info'] } schema_registry_client = SchemaRegistryClient(schema_registry_conf) name_avro_deserializer = AvroDeserializer( schema_registry_client=schema_registry_client, schema_str=ccloud_lib.name_schema, from_dict=ccloud_lib.Name.dict_to_name) count_avro_deserializer = AvroDeserializer( schema_registry_client=schema_registry_client, schema_str=ccloud_lib.count_schema, from_dict=ccloud_lib.Count.dict_to_count) # for full list of configurations, see: # https://docs.confluent.io/platform/current/clients/confluent-kafka-python/#deserializingconsumer consumer_conf = ccloud_lib.pop_schema_registry_params_from_config(conf) consumer_conf['key.deserializer'] = name_avro_deserializer consumer_conf['value.deserializer'] = count_avro_deserializer consumer_conf['group.id'] = 'python_example_group_2' consumer_conf['auto.offset.reset'] = 'earliest' consumer = DeserializingConsumer(consumer_conf)
import logging from confluent_kafka import DeserializingConsumer from confluent_kafka.avro import SerializerError from confluent_kafka.schema_registry.avro import AvroDeserializer from avro_schemas.key_schema import key_schema_str from avro_schemas.value_schema import value_schema_str from constants import SCHEMA_REGISTRY_CLIENT, Topics, BOOTSTRAP_SERVERS from utils import reset_to_beginning_on_assign, convert_epoch_to_datetime, reset_to_end_on_assign consumer = DeserializingConsumer({ "bootstrap.servers": BOOTSTRAP_SERVERS, "key.deserializer": AvroDeserializer(schema_str=key_schema_str, schema_registry_client=SCHEMA_REGISTRY_CLIENT), "value.deserializer": AvroDeserializer(schema_str=value_schema_str, schema_registry_client=SCHEMA_REGISTRY_CLIENT), "group.id": "consumer", "auto.offset.reset": "earliest" }) consumer.subscribe( topics=[Topics.TOPIC_1.value], on_assign=reset_to_beginning_on_assign, # on_assign=reset_to_end_on_assign, )
class TestMessages: test_messages = [ 'test message 1', 'test message 2', 'test message 3', 'test message 4' ] topic = 'christian_test' conf = kafka_utils.read_config('producer_google_chicago_1.config', 'producer_google_chicago_1') schema_registry_conf = {'url': conf['schema.registry.url']} schema_registry_client = SchemaRegistryClient(schema_registry_conf) key_schema_file = portfolio_path + "/kafka" + conf['google.key.schema.file'] value_schema_file = portfolio_path + "/kafka" + conf[ 'google.value.schema.file'] key_schema, value_schema = kafka_utils.load_avro_schema_from_file( key_schema_file, value_schema_file) key_avro_serializer = AvroSerializer(key_schema, schema_registry_client, google.Key.key_to_dict) value_avro_serializer = AvroSerializer(value_schema, schema_registry_client, google.Value.value_to_dict) key_avro_deserializer = AvroDeserializer(key_schema, schema_registry_client, google.Key.dict_to_key) value_avro_deserializer = AvroDeserializer(value_schema, schema_registry_client, google.Value.dict_to_value) def test_producer(self): # Read arguments and configurations and initialize producer_config = { 'bootstrap.servers': self.conf['bootstrap.servers'], 'key.serializer': self.key_avro_serializer, 'value.serializer': self.value_avro_serializer } producer = SerializingProducer(producer_config) delivered_records = 0 for text in self.test_messages: url = 'www.test.com' scraper_dt = datetime.now(pytz.timezone('America/Denver')) scraper_dt = scraper_dt.strftime("%Y/%m/%d %H:%M:%S %z") value_obj = google.Value(text=text, scraper_dt=scraper_dt) key_obj = google.Key(url=(url)) producer.produce(topic=self.topic, key=key_obj, value=value_obj, on_delivery=kafka_utils.acked) delivered_records += producer.poll() producer.flush() assert delivered_records == len(self.test_messages) def test_consumer(self): consumer_config = { 'bootstrap.servers': self.conf['bootstrap.servers'], 'key.deserializer': self.key_avro_deserializer, 'value.deserializer': self.value_avro_deserializer, 'group.id': '1', 'auto.offset.reset': 'earliest' } offset = kafka_utils.offset - len(self.test_messages) + 1 consumer = DeserializingConsumer(consumer_config) partitions = [] partition = TopicPartition(topic=self.topic, partition=0, offset=offset) partitions.append(partition) consumer.assign(partitions) # Process messages result = [] attempt = 0 while len(result) < len(self.test_messages): try: msg = consumer.poll(1.0) attempt += 1 if msg is None: print("no message received") if attempt < 10: pass else: break elif msg.error(): break else: value_object = msg.value() text = value_object.text print("adding {} to result".format(text)) result.append(text) except KeyboardInterrupt: break except SerializerError as e: break # Leave group and commit final offsets consumer.close() assert result == self.test_messages
def main(): sr_conf = {'url': SCHEMA_REGISTRY_URL} schema_registry_client = SchemaRegistryClient(sr_conf) schema_str = """ { "namespace": "io.confluent.ksql.avro_schemas", "name": "User", "type": "record", "fields":[ {"name":"DATESTAMP","type":"string"}, {"name":"TIMESTAMP","type":"string"}, {"name":"MILLISEC","type":"string"}, {"name":"LOGLEVEL","type":"string"}, {"name":"REQUESTID","type":"string"}, {"name":"RECORDFORMATVERSION","type":"string"}, {"name":"SOURCEIP","type":"string"}, {"name":"DNSDOMAIN","type":"string"}, {"name":"MESSAGETYPE","type":"string"}, {"name":"OPERATION","type":"string"}, {"name":"AUTHUSER","type":"string"}, {"name":"AUTHDOMAIN","type":"string"}, {"name":"HTTPCODE","type":"string"}, {"name":"SOURCEBYTES","type":"string"}, {"name":"RESPONSEBYTES","type":"string"}, {"name":"ELAPSEDTIME","type":"string"}, {"name":"DOMAIN","type":"string"}, {"name":"BUCKET","type":"string"}, {"name":"OBJECT","type":"string"} ] } """ avro_deserializer = AvroDeserializer(schema_str, schema_registry_client) string_deserializer = StringDeserializer('utf_8') consumer_conf = { 'bootstrap.servers': bootstrap_servers, 'key.deserializer': string_deserializer, 'value.deserializer': avro_deserializer, 'group.id': group, 'auto.offset.reset': "earliest" } consumer = DeserializingConsumer(consumer_conf) consumer.subscribe([topic]) while True: try: # SIGINT can't be handled when polling, limit timeout to 1 second. msg = consumer.poll(1.0) if msg is None: continue record = msg.value() if record is not None: if record['OPERATION'] == "POST" and record[ 'DOMAIN'] != "%28none%29": urllistraw = "http://" + record['DOMAIN'] + "/" + record[ 'BUCKET'] + "/" + record['OBJECT'] urllist = urllistraw[:-1] print(urllist) r = requests.head(urllist) print(r.headers) else: continue except KeyboardInterrupt: break consumer.close()
from confluent_kafka.schema_registry import SchemaRegistryClient from confluent_kafka.schema_registry.avro import AvroDeserializer from confluent_kafka.serialization import SerializationContext schemaRegistryClient = SchemaRegistryClient({"url": "http://*****:*****@' ) serializationContext = SerializationContext("time-series", schema) deserialized_message = avroDeserializer(message, serializationContext)
if __name__ == '__main__': # Read arguments and configurations and initialize args = ccloud_lib.parse_args() config_file = args.config_file topic = args.topic conf = ccloud_lib.read_ccloud_config(config_file) schema_registry_conf = { 'url': conf['schema.registry.url'], 'basic.auth.user.info': conf['schema.registry.basic.auth.user.info'] } schema_registry_client = SchemaRegistryClient(schema_registry_conf) name_avro_deserializer = AvroDeserializer(ccloud_lib.name_schema, schema_registry_client, ccloud_lib.Name.dict_to_name) count_avro_deserializer = AvroDeserializer(ccloud_lib.count_schema, schema_registry_client, ccloud_lib.Count.dict_to_count) # for full list of configurations, see: # https://docs.confluent.io/current/clients/confluent-kafka-python/#deserializingconsumer consumer_conf = { 'bootstrap.servers': conf['bootstrap.servers'], 'sasl.mechanisms': conf['sasl.mechanisms'], 'security.protocol': conf['security.protocol'], 'sasl.username': conf['sasl.username'], 'sasl.password': conf['sasl.password'], 'key.deserializer': name_avro_deserializer, 'value.deserializer': count_avro_deserializer,
def create_avro_deserializer(self, topic_name): schema_string = self.load_avro_schema_string(topic_name) return AvroDeserializer(schema_string, self.registry_client)
if __name__ == '__main__': # Read arguments and configurations and initialize args = ccloud_lib_rssfeeds.parse_args() config_file = args.config_file topic = args.topic conf = ccloud_lib_rssfeeds.read_ccloud_config(config_file) schema_registry_conf = { 'url': conf['schema.registry.url'], 'basic.auth.user.info': conf['schema.registry.basic.auth.user.info'] } schema_registry_client = SchemaRegistryClient(schema_registry_conf) # schema for value value_avro_deserializer = AvroDeserializer( ccloud_lib_rssfeeds.value_schema, schema_registry_client, ccloud_lib_rssfeeds.Value.dict_to_value) # for full list of configurations, see: # https://docs.confluent.io/current/clients/confluent-kafka-python/#deserializingconsumer consumer_conf = { 'bootstrap.servers': conf['bootstrap.servers'], 'sasl.mechanisms': conf['sasl.mechanisms'], 'security.protocol': conf['security.protocol'], 'sasl.username': conf['sasl.username'], 'sasl.password': conf['sasl.password'], # 'key.deserializer': name_avro_deserializer, 'value.deserializer': value_avro_deserializer, 'group.id': 'rssfeed-consumer-1', 'auto.offset.reset': 'earliest' }