def test_auto_offset_reset_latest(topic: str) -> None: producer = ConfluentProducer(configuration) value = uuid.uuid1().hex.encode("utf-8") producer.produce(topic, value=value) assert producer.flush(5.0) is 0 consumer = KafkaConsumer( { **configuration, "auto.offset.reset": "latest", "enable.auto.commit": "false", "enable.auto.offset.store": "true", "enable.partition.eof": "true", "group.id": "test-latest", } ) consumer.subscribe([topic]) try: consumer.poll(10.0) # XXX: getting the subcription is slow except EndOfStream as error: assert error.stream == TopicPartition(topic, 0) assert error.offset == 1 else: raise AssertionError('expected EndOfStream error') consumer.close()
def __init__(self, configuration: Mapping[str, Any]) -> None: self.__configuration = configuration self.__producer = ConfluentProducer(configuration) self.__shutdown_requested = Event() # The worker must execute in a separate thread to ensure that callbacks # are fired -- otherwise trying to produce "synchronously" via # ``produce(...).result()`` could result in a deadlock. self.__result = execute(self.__worker)
def __init__(self, topic, group_id, bootstrap_servers, **kwargs): self.topic = topic self.group_id = group_id self.bootstrap_servers = bootstrap_servers config = kwargs.copy() config["group.id"] = group_id config["bootstrap.servers"] = ",".join(bootstrap_servers) log.info("config", extra={"config": config}) self.producer = ConfluentProducer(config)
def __init__(self, bootstrap_servers: str, topic: str) -> None: self._topic = topic config = { 'bootstrap.servers': bootstrap_servers, 'default.topic.config': { 'acks': 'all' }, 'queue.buffering.max.ms': 50, 'metadata.request.timeout.ms': 20000, 'client.id': socket.gethostname(), 'compression.codec': 'snappy', 'api.version.request': True, 'log.connection.close': False, # The maximum size of a request in bytes 'message.max.bytes': MEBIBYTE * 4 } self._producer = ConfluentProducer(config)
def confluent_producer(self): try: return self._confluent_producer except AttributeError: if self._bootstrap_servers: self._confluent_producer = ConfluentProducer({ 'bootstrap.servers': self._bootstrap_servers, # 'debug': 'topic,broker', 'queue.buffering.max.ms': 100 }) print('Created Kafka producer, bootstrap_servers=%s' % self._bootstrap_servers) return self._confluent_producer print('Kafka producer: None') return None
def custom_init(self): self._producer = KafkaProducer( bootstrap_servers=frame_config.KAFKA_BOOTSTRAP_SERVERS) try: admin_client = KafkaAdminClient( bootstrap_servers=frame_config.KAFKA_BOOTSTRAP_SERVERS) admin_client.create_topics([NewTopic(self._queue_name, 10, 1)]) # admin_client.create_partitions({self._queue_name: NewPartitions(total_count=16)}) except TopicAlreadyExistsError: pass except Exception as e: self.logger.exception(e) atexit.register(self.close) # 程序退出前不主动关闭,会报错。 self._confluent_producer = ConfluentProducer({ 'bootstrap.servers': ','.join(frame_config.KAFKA_BOOTSTRAP_SERVERS) }) self._recent_produce_time = time.time()
def test_commit_log_consumer(topic: str) -> None: # XXX: This would be better as an integration test (or at least a test # against an abstract Producer interface) instead of against a test against # a mock. commit_log_producer = FakeConfluentKafkaProducer() consumer = KafkaConsumerWithCommitLog( { **configuration, "auto.offset.reset": "earliest", "enable.auto.commit": "false", "enable.auto.offset.store": "true", "enable.partition.eof": "true", "group.id": "test", "session.timeout.ms": 10000, }, commit_log_producer, 'commit-log', ) consumer.subscribe([topic]) producer = ConfluentProducer(configuration) producer.produce(topic) assert producer.flush(5.0) is 0 message = consumer.poll(10.0) # XXX: getting the subscription is slow assert isinstance(message, Message) assert consumer.commit() == {TopicPartition(topic, 0): message.offset + 1} assert len(commit_log_producer.messages) == 1 commit_message = commit_log_producer.messages[0] assert commit_message.topic() == 'commit-log' assert commit_message.key() == '{}:{}:{}'.format(topic, 0, 'test').encode('utf-8') assert commit_message.value() == '{}'.format(message.offset + 1).encode( 'utf-8') # offsets are last processed message offset + 1
def test_auto_offset_reset_error(topic: str) -> None: producer = ConfluentProducer(configuration) value = uuid.uuid1().hex.encode("utf-8") producer.produce(topic, value=value) assert producer.flush(5.0) is 0 consumer = KafkaConsumer( { **configuration, "auto.offset.reset": "error", "enable.auto.commit": "false", "enable.auto.offset.store": "true", "enable.partition.eof": "true", "group.id": "test-error", } ) consumer.subscribe([topic]) with pytest.raises(ConsumerError): consumer.poll(10.0) # XXX: getting the subcription is slow consumer.close()
def test_auto_offset_reset_earliest(topic: str) -> None: producer = ConfluentProducer(configuration) value = uuid.uuid1().hex.encode("utf-8") producer.produce(topic, value=value) assert producer.flush(5.0) is 0 consumer = KafkaConsumer( { **configuration, "auto.offset.reset": "earliest", "enable.auto.commit": "false", "enable.auto.offset.store": "true", "enable.partition.eof": "true", "group.id": "test-earliest", } ) consumer.subscribe([topic]) message = consumer.poll(10.0) assert isinstance(message, Message) assert message.offset == 0 consumer.close()
def test_consumer(topic: str) -> None: consumer = KafkaConsumer({ **configuration, "auto.offset.reset": "latest", "enable.auto.commit": "false", "enable.auto.offset.store": "true", "enable.partition.eof": "true", "group.id": "test", "session.timeout.ms": 10000, }) # TODO: It'd be much nicer if ``subscribe`` returned a future that we could # use to wait for assignment, but we'd need to be very careful to avoid # edge cases here. It's probably not worth the complexity for now. # XXX: There has got to be a better way to do this... assignment_callback = mock.MagicMock() revocation_callback = mock.MagicMock() consumer.subscribe([topic], on_assign=assignment_callback, on_revoke=revocation_callback) try: consumer.poll(10.0) # XXX: getting the subcription is slow except EndOfStream as error: assert error.stream == TopicPartition(topic, 0) assert error.offset == 0 else: raise AssertionError('expected EndOfStream error') assert assignment_callback.call_args_list == [ mock.call([TopicPartition(topic, 0)]) ] producer = ConfluentProducer(configuration) value = uuid.uuid1().hex.encode("utf-8") producer.produce(topic, value=value) assert producer.flush(5.0) is 0 message = consumer.poll(1.0) assert isinstance(message, Message) assert message.stream == TopicPartition(topic, 0) assert message.offset == 0 assert message.value == value try: assert consumer.poll(1.0) is None except EndOfStream as error: assert error.stream == TopicPartition(topic, 0) assert error.offset == 1 else: raise AssertionError('expected EndOfStream error') assert consumer.commit() == {TopicPartition(topic, 0): message.offset + 1} consumer.unsubscribe() assert consumer.poll(1.0) is None assert revocation_callback.call_args_list == [ mock.call([TopicPartition(topic, 0)]) ] consumer.close() with pytest.raises(RuntimeError): consumer.subscribe([topic]) with pytest.raises(RuntimeError): consumer.unsubscribe() with pytest.raises(RuntimeError): consumer.poll() with pytest.raises(RuntimeError): consumer.commit() consumer.close()
def __init__( self, topic_name, num_partitions = 1, replication_factor = 1, cleanup = 'delete', retention = retention_bytes, compression='gzip', delete = '1300', file_delete = '2000', recreate = True, avro_prod = False, ): """ :param topic_name: :param num_partitions: :param replication_factor: :param cleanup: :param retention: :param compression: :param delete: :param file_delete: :param recreate: """ self.topic_name = topic_name self.num_partitions = num_partitions self.replication_factor = replication_factor self.cleanup = cleanup self.retention = retention self.compression = compression self.delete = delete self.file_delete = file_delete self.recreate = recreate self.avro_prod = avro_prod self.kafka_bootstrap = os.getenv('kafka_bootstrap') self.cliet_id = socket.gethostname() self.client = AdminClient({"bootstrap.servers": self.kafka_bootstrap}) self.broker_properties = { "bootstrap.servers": self.kafka_bootstrap, "client.id": self.cliet_id, "linger.ms": 1000, "compression.type": self.compression, "batch.num.messages": 1000000, } # If the topic does not already exist, try to create it if self.topic_exists(self.client, self.topic_name): logger.info("Topic {} already exists".format(self.topic_name)) if self.recreate: self.delete_topic(self.client, [self.topic_name]) time.sleep(5) logger.info("Topic {} will be create".format(self.topic_name)) self.create_topic(self.client) else: logger.info("Topic {} already exists in Kafka Cluster".format(self.topic_name)) else: logger.info("Topic {} will be create".format(self.topic_name)) self.create_topic(self.client) if not self.avro_prod: self.producer = ConfluentProducer(self.broker_properties) else: raise NotImplemented
def test_consumer(topic: str) -> None: def build_consumer() -> KafkaConsumer: return KafkaConsumer( { **configuration, "auto.offset.reset": "earliest", "enable.auto.commit": "false", "enable.auto.offset.store": "true", "enable.partition.eof": "true", "group.id": "test", "session.timeout.ms": 10000, } ) producer = ConfluentProducer(configuration) value = uuid.uuid1().hex.encode("utf-8") for i in range(2): producer.produce(topic, value=value) assert producer.flush(5.0) is 0 consumer = build_consumer() def assignment_callback(streams: Sequence[TopicPartition]): assignment_callback.called = True assert streams == [TopicPartition(topic, 0)] assert consumer.tell() == {TopicPartition(topic, 0): 0} consumer.seek({TopicPartition(topic, 0): 1}) with pytest.raises(ConsumerError): consumer.seek({TopicPartition(topic, 1): 0}) def revocation_callback(streams: Sequence[TopicPartition]): revocation_callback.called = True assert streams == [TopicPartition(topic, 0)] assert consumer.tell() == {TopicPartition(topic, 0): 1} # Not sure why you'd want to do this, but it shouldn't error. consumer.seek({TopicPartition(topic, 0): 0}) # TODO: It'd be much nicer if ``subscribe`` returned a future that we could # use to wait for assignment, but we'd need to be very careful to avoid # edge cases here. It's probably not worth the complexity for now. consumer.subscribe([topic], on_assign=assignment_callback, on_revoke=revocation_callback) message = consumer.poll(10.0) # XXX: getting the subcription is slow assert isinstance(message, Message) assert message.stream == TopicPartition(topic, 0) assert message.offset == 1 assert message.value == value assert consumer.tell() == {TopicPartition(topic, 0): 2} assert getattr(assignment_callback, 'called', False) consumer.seek({TopicPartition(topic, 0): 0}) assert consumer.tell() == {TopicPartition(topic, 0): 0} with pytest.raises(ConsumerError): consumer.seek({TopicPartition(topic, 1): 0}) message = consumer.poll(1.0) assert isinstance(message, Message) assert message.stream == TopicPartition(topic, 0) assert message.offset == 0 assert message.value == value assert consumer.commit() == {TopicPartition(topic, 0): message.get_next_offset()} consumer.unsubscribe() assert consumer.poll(1.0) is None assert consumer.tell() == {} with pytest.raises(ConsumerError): consumer.seek({TopicPartition(topic, 0): 0}) consumer.close() with pytest.raises(RuntimeError): consumer.subscribe([topic]) with pytest.raises(RuntimeError): consumer.unsubscribe() with pytest.raises(RuntimeError): consumer.poll() with pytest.raises(RuntimeError): consumer.tell() with pytest.raises(RuntimeError): consumer.seek({TopicPartition(topic, 0): 0}) with pytest.raises(RuntimeError): consumer.commit() consumer.close() consumer = build_consumer() consumer.subscribe([topic]) message = consumer.poll(10.0) # XXX: getting the subscription is slow assert isinstance(message, Message) assert message.stream == TopicPartition(topic, 0) assert message.offset == 1 assert message.value == value try: assert consumer.poll(1.0) is None except EndOfStream as error: assert error.stream == TopicPartition(topic, 0) assert error.offset == 2 else: raise AssertionError('expected EndOfStream error') consumer.close()
def create_producer(self): from confluent_kafka import Producer as ConfluentProducer self.producer = ConfluentProducer(self.producer_settings) return self.producer
pluginmsg.metrics.measurement_id = 42 pluginmsg.metrics.config_version = 1 pluginmsg.metrics.timestamp = 3000 for key, val in SimulatedTags.items(): pluginmsg.metrics.tags[key] = val pluginmsg.metrics.values = serializeHTTPMetrics() pluginmsg.send_time = 1000 return pluginmsg.SerializeToString() def pushKafka(kproducer, topic): msg = serializePluginMetrics() try: kproducer.produce(topic, key=str(random.randint(1, 100)), value=msg) except Exception: print(Exception.message) if __name__ == "__main__": kproducer = ConfluentProducer({ 'bootstrap.servers': 'localhost:9092', # 'debug': 'topic,broker', 'queue.buffering.max.ms': 100}) while True: print("Pushing plugin metrics to topic plugin.metrics ") pushKafka(kproducer, "plugin.metrics") time.sleep(5)