def get_consumer_for_topic(self, topic_name, group_id, partition, offset=None): """ Method to instantiate the kafka consumer for the given topic, consumer group and partition :param topic_name: topic name :param group_id: consumer group id :param partition: partition id :return: consumer instance """ try: log.info("Fetching consumer for topic: " + topic_name) if topic_name + "_" + str(partition) in self.consumer_dict: return self.consumer_dict[topic_name + "_" + str(partition)] conf = {'bootstrap.servers': self.bootstrap_servers, 'group.id': group_id, # 'session.timeout.ms': 1000, 'default.topic.config': { 'auto.offset.reset': 'earliest' } } consumer = confluent_kafka.Consumer(**conf) if offset is None: tp = confluent_kafka.TopicPartition(topic_name, partition) else: tp = confluent_kafka.TopicPartition(topic_name, partition, offset) consumer.assign([tp]) self.consumer_dict[topic_name + "_" + str(partition)] = consumer except Exception as e: print(e) log.error("Error while setting up the consumer for topic: " + topic_name) raise e return consumer
def get_message_batch(kafka_params, topic, partition, keys, low, high, timeout=None): """Fetch a batch of kafka messages (keys & values) in given topic/partition This will block until messages are available, or timeout is reached. """ import confluent_kafka as ck t0 = time.time() consumer = ck.Consumer(kafka_params) tp = ck.TopicPartition(topic, partition, low) consumer.assign([tp]) out = [] try: while True: msg = consumer.poll(0) if msg and msg.value() and msg.error() is None: if high >= msg.offset(): if keys: out.append({'key':msg.key(), 'value':msg.value()}) else: out.append(msg.value()) if high <= msg.offset(): break else: time.sleep(0.1) if timeout is not None and time.time() - t0 > timeout: break finally: consumer.close() return out
def __enter__(self): self.consumer = confluent_kafka.Consumer(**self.kafka_kwargs) if self.frombeginning: self.consumer.subscribe([self.topic], on_assign=set_offset_beginning) else: self.consumer.subscribe([self.topic]) return self
def _create_worker_consumer(self, transport: 'Transport', loop: asyncio.AbstractEventLoop) -> _Consumer: conf = self.app.conf self._assignor = self.app.assignor # XXX parition.assignment.strategy is string # need to write C wrapper for this # 'partition.assignment.strategy': [self._assignor] return confluent_kafka.Consumer({ 'bootstrap.servers': server_list(transport.url, transport.default_port), 'group.id': conf.id, 'client.id': conf.broker_client_id, 'default.topic.config': { 'auto.offset.reset': 'earliest', }, 'enable.auto.commit': False, 'fetch.max.bytes': conf.consumer_max_fetch_size, 'request.timeout.ms': int(conf.broker_request_timeout * 1000.0), 'check.crcs': conf.broker_check_crcs, 'session.timeout.ms': int(conf.broker_session_timeout * 1000.0), 'heartbeat.interval.ms': int(conf.broker_heartbeat_interval * 1000.0), })
def __init__(self, conf, reset=False): """Set @reset to True to begin consuming at start of stream.""" config = dict() self.topic = conf['kafka']['topic'] config['bootstrap.servers'] = conf['kafka']['address'] default_topic_config = {} default_topic_config["auto.offset.reset"] = "smallest" default_topic_config['enable.auto.commit'] = True config["default.topic.config"] = default_topic_config # Set the group ID. state = self._get_state_info(conf) if not reset and state: group_id = state['group_id'] else: group_id = "CG_" + str(uuid.uuid4()) self._update_state(conf,'group_id', group_id) config["group.id"] = group_id # Add SSL stuff if conf['kafka'].getboolean('ssl_enable'): config["security.protocol"] = 'ssl' config["ssl.ca.location"] = conf['kafka']['ca_path'] config["ssl.certificate.location"] = conf['kafka']['cert_path'] config["ssl.key.location"] = conf['kafka']['key_path'] config["ssl.key.password"] = conf['kafka']['password'] self.consumer = confluent_kafka.Consumer(config) self.consumer.subscribe([self.topic]) p_schema = Utils.load_schema(conf['kafka']['schema']) c_schema = Utils.load_schema(conf['kafka']['schema']) self.deserializer = KafkaAvroGenericDeserializer(c_schema, p_schema)
def __init__( self, hosts: List[str], group_id: str, subscription: List[str] = [], auto_offset: bool = True, start_from_beginning_if_no_offset_available: bool = True, statistics_interval_ms: int = 1000, use_confluent_monitoring_interceptor: bool = False, logger: Optional[logging.Logger] = None, debug: bool = False, ): conf = { "bootstrap.servers": ",".join(hosts), "group.id": group_id, "enable.auto.offset.store": auto_offset, "statistics.interval.ms": statistics_interval_ms, "error_cb": self.error_callback, "stats_cb": self.stats_callback, "throttle_cb": self.throttle_callback, } if start_from_beginning_if_no_offset_available: conf["auto.offset.reset"] = "earliest" if use_confluent_monitoring_interceptor: conf["plugin.library.paths"] = "monitoring-interceptor" if debug: conf["debug"] = "consumer" self.subscription = subscription self.logger = logger or logging.getLogger("KafkaConsumer") self._kafka_instance = confluent_kafka.Consumer(conf, logger=self.logger) self._async_poll = async_wrap(self._kafka_instance.poll)
def confluent_kafka_consumer_performance(): topic = 'test' msg_consumed_count = 0 conf = { 'bootstrap.servers': '172.20.10.10:9092', 'group.id': 'test-consumer-group', 'session.timeout.ms': 6000, 'default.topic.config': { 'auto.offset.reset': 'earliest' } } consumer = confluent_kafka.Consumer(**conf) print(consumer) consumer_start = time.time() # This is the same as pykafka, subscribing to a topic will start a background thread consumer.subscribe([topic]) while True: msg = consumer.poll(1) if msg: msg_consumed_count += 1 print(msg) if msg_consumed_count >= 10: break consumer_timing = time.time() - consumer_start consumer.close() return consumer_timing
def __init__(self, broker, timeout=None, topics=["^ztf_.*"], **consumer_config): """ """ self._metrics = KafkaMetrics.instance() config = { "bootstrap.servers": broker, "default.topic.config": {"auto.offset.reset": "smallest"}, "enable.auto.commit": True, "receive.message.max.bytes": 2 ** 29, "auto.commit.interval.ms": 10000, "enable.auto.offset.store": False, "group.id": uuid.uuid1(), "enable.partition.eof": False, # don't emit messages on EOF "topic.metadata.refresh.interval.ms": 1000, # fetch new metadata every second to pick up topics quickly # "debug": "all", "stats_cb": self._metrics.on_stats_callback, "statistics.interval.ms": 10000, } config.update(**consumer_config) self._consumer = confluent_kafka.Consumer(**config) self._consumer.subscribe(topics) if timeout is None: self._poll_interval = 1 self._poll_attempts = sys.maxsize else: self._poll_interval = max((1, min((30, timeout)))) self._poll_attempts = max((1, int(timeout / self._poll_interval))) self._timeout = timeout self._last_message = None
def _create_consumer(consumer_group, consumer_type, initial_offset_reset): """ Creates a kafka consumer based on the :param consumer_group: :return: """ topic_name = ConsumerType.get_topic_name(consumer_type) cluster_name = settings.KAFKA_TOPICS[topic_name]["cluster"] bootstrap_servers = settings.KAFKA_CLUSTERS[cluster_name][ "bootstrap.servers"] consumer_configuration = { "bootstrap.servers": bootstrap_servers, "group.id": consumer_group, "enable.auto.commit": "false", # we commit manually "enable.auto.offset.store": "true", # we let the broker keep count of the current offset (when committing) "enable.partition.eof": "false", # stop EOF errors when we read all messages in the topic "default.topic.config": { "auto.offset.reset": initial_offset_reset }, } return kafka.Consumer(consumer_configuration)
def test_consumer_confluent(conf: Config): consumer = ck.Consumer( **{ 'bootstrap.servers': conf["brokers"], 'group.id': "confluent-" + str(uuid.uuid1()), # 'group.id': "confluent", 'session.timeout.ms': 6000, 'default.topic.config': { 'auto.offset.reset': 'earliest' }, 'enable.auto.commit': 'true' }) consumer.subscribe([conf["topic"]], on_assign=my_on_assign) start = datetime.datetime.now() max_fetch_messages = 500 _counter = 0 while _counter < conf["num_messages"]: messages = consumer.consume(num_messages=max_fetch_messages) for message in messages: _counter += 1 if _counter >= conf["num_messages"]: break end = datetime.datetime.now() log.debug("Consumed %s messages", _counter) return TestResult(start=start, end=end, num_messages=_counter)
def inner(options=None): test_name = request.node.name topics = [_get_topic_name(_EVENTS_TOPIC_NAME, test_name)] options = _kafka_processing_config(test_name, options) # look for the servers (it is the only config we are interested in) servers = [ elm['value'] for elm in options['processing']['kafka_config'] if elm['name'] == 'bootstrap.servers' ] if len(servers) < 1: raise ValueError( "Bad kafka_config, could not find 'bootstrap.servers'.\n" "The configuration should have an entry of the format \n" "{name:'bootstrap.servers', value:'127.0.0.1'} at path 'processing.kafka_config'" ) servers = servers[0] settings = { 'bootstrap.servers': servers, 'group.id': 'test.consumer', 'enable.auto.commit': True, 'auto.offset.reset': 'earliest', } consumer = kafka.Consumer(settings) consumer.subscribe(topics) return consumer
def poll_kafka(self): import confluent_kafka as ck consumer = ck.Consumer(self.consumer_params) try: while not self.stopped: out = [] for partition in range(self.npartitions): tp = ck.TopicPartition(self.topic, partition, 0) try: low, high = consumer.get_watermark_offsets(tp, timeout=0.1) except (RuntimeError, ck.KafkaException): continue current_position = self.positions[partition] lowest = max(current_position, low) out.append((self.consumer_params, self.topic, partition, lowest, high - 1)) self.positions[partition] = high for part in out: yield self._emit(part) else: yield gen.sleep(self.poll_interval) finally: consumer.close()
def inner(topic: str, options=None): topic_name = get_topic_name(topic) topics = [topic_name] options = processing_config(options) # look for the servers (it is the only config we are interested in) servers = [ elm["value"] for elm in options["processing"]["kafka_config"] if elm["name"] == "bootstrap.servers" ] if len(servers) < 1: raise ValueError( "Bad kafka_config, could not find 'bootstrap.servers'.\n" "The configuration should have an entry of the format \n" "{name:'bootstrap.servers', value:'127.0.0.1'} at path 'processing.kafka_config'" ) servers = servers[0] settings = { "bootstrap.servers": servers, "group.id": "test-consumer-%s" % uuid.uuid4().hex, "enable.auto.commit": True, "auto.offset.reset": "earliest", } consumer = kafka.Consumer(settings) consumer.assign([kafka.TopicPartition(t, 0) for t in topics]) def die(): consumer.close() request.addfinalizer(die) return consumer, options, topic_name
def __init__(self, serializer: Serializable, kafka_topic: str, bootstrap_server: str = "localhost", bootstrap_port: int = 9092, default_group: str = "default-group", *args, **kwargs): """Connects to a kafka topic and sets up the ingest Args: serializer (Serializable): Serializer to convert a message to bytes before sending to kafka. kafka_topic (str): Name of kafka topic to publish to. bootstrap_server (str, optional): Address of the Kafka bootstrap server. Defaults to "localhost". bootstrap_port (int, optional): Bootstrap server port on which the topic is listening for messages. Defaults to 9092. default_group (str, optional): Group name for this consumer group. Defaults to "default-group". """ self.kafka_topic = kafka_topic conf = { "bootstrap.servers": bootstrap_server + ":" + str(bootstrap_port), "client.id": socket.gethostname(), "group.id": default_group } self.create_topic(topic_name=kafka_topic, conf=conf) # TODO is this safe? self.consumer = confluent_kafka.Consumer(conf) self.consumer.subscribe([self.kafka_topic]) self.running = True super().__init__(serializer=serializer, *args, **kwargs)
def poll(self, on_idle=None, timeout=1.0): consumer = confluent_kafka.Consumer({ 'bootstrap.servers': self.servers, 'group.id': self.group, 'enable.auto.commit': False, 'enable.partition.eof': False, 'socket.keepalive.enable': True, }) consumer.subscribe([self.topic]) atexit.register(consumer.close) # Listen for messages. while True: # Without a timeout, KeyboardInterrupt is ignored. message = consumer.poll(timeout=timeout) # No message was received before the timeout. if not message: if callable(on_idle): on_idle() continue if message.error(): raise confluent_kafka.KafkaException(message.error()) try: self.dispatch(message) except Exception as err: raise DispatchException(err) else: consumer.commit(message)
def run(runarg, return_dict): """run. """ processID = runarg['processID'] # Configure database connection try: msl = make_database_connection() except Exception as e: print('ERROR cannot connect to local database', e) sys.stdout.flush() return # Start consumer and print alert stream try: consumer = confluent_kafka.Consumer(**runarg['conf']) consumer.subscribe([runarg['args'].topic]) except Exception as e: print('ERROR cannot connect to kafka', e) sys.stdout.flush() return # Number of alerts in the batch if runarg['args'].maxalert: maxalert = runarg['args'].maxalert else: maxalert = 50000 nalert_in = nalert_out = nalert_ss = 0 startt = time.time() while nalert_in < maxalert: # Here we get the next alert by kafka msg = consumer.poll(timeout=settings.KAFKA_TIMEOUT) if msg is None: break if msg.error(): continue if msg.value() is None: continue else: # Apply filter to each alert alert = json.loads(msg.value()) nalert_in += 1 d = alert_filter(alert, msl) nalert_out += d['nalert'] nalert_ss += d['ss'] if nalert_in % 1000 == 0: print('process %d nalert_in %d nalert_out %d time %.1f' % (processID, nalert_in, nalert_out, time.time() - startt)) sys.stdout.flush() # refresh the database every 1000 alerts # make sure everything is committed msl.close() msl = make_database_connection() consumer.close() return_dict[processID] = { 'nalert_in': nalert_in, 'nalert_out': nalert_out, 'nalert_ss': nalert_ss }
def confluent_kafka_consumer_performance(topic=topic): msg_consumed_count = 0 conf = { 'bootstrap.servers': bootstrap_servers, 'group.id': uuid.uuid1(), 'session.timeout.ms': 6000, 'default.topic.config': { 'auto.offset.reset': 'earliest' } } consumer = confluent_kafka.Consumer(**conf) print("\n>>> Connect Kafka in {} by confluent-kafka-python as consumer". format(bootstrap_servers)) consumer_start = time.time() # This is the same as pykafka, subscribing to a topic will start a background thread consumer.subscribe([topic]) while True: msg = consumer.poll(1) if msg: msg_consumed_count += 1 if msg_consumed_count >= msg_count: break consumer_timing = time.time() - consumer_start consumer.close() return consumer_timing
def __init__(self, config: KafkaSourceConfig, ctx: PipelineContext): super().__init__(config, ctx) self.source_config = config if ( self.is_stateful_ingestion_configured() and not self.source_config.platform_instance ): raise ConfigurationError( "Enabling kafka stateful ingestion requires to specify a platform instance." ) self.consumer = confluent_kafka.Consumer( { "group.id": "test", "bootstrap.servers": self.source_config.connection.bootstrap, **self.source_config.connection.consumer_config, } ) # Use the fully qualified name for SchemaRegistryClient to make it mock patchable for testing. self.schema_registry_client = ( confluent_kafka.schema_registry.schema_registry_client.SchemaRegistryClient( { "url": self.source_config.connection.schema_registry_url, **self.source_config.connection.schema_registry_config, } ) ) self.report = KafkaSourceReport() self.known_schema_registry_subjects: List[str] = [] try: self.known_schema_registry_subjects.extend( self.schema_registry_client.get_subjects() ) except Exception as e: logger.warning(f"Failed to get subjects from schema registry: {e}")
def get_kafka_consumer(): consumer = None try: consumer = confluent_kafka.Consumer(**get_config()) except Exception as e: print('Could not create kafka consumer', e, file=sys.stderr) return consumer
def confluent_kafka_consumer_performance(nums=5000): topic = b'event_log' msg_consumed_count = 0 conf = { 'bootstrap.servers': '192.168.0.162:9092', 'group.id': 'zy_consumer', 'session.timeout.ms': 6000, 'default.topic.config': { 'auto.offset.reset': 'earliest' } } consumer = confluent_kafka.Consumer(**conf) consumer_start = time.time() # This is the same as pykafka, subscribing to a topic will start a background thread consumer.subscribe([topic]) while True: print('>>>>>>>>>>>>>>>Starting<<<<<<<<<<<<<<<<<') msg = consumer.consume() if msg: print(msg.value) msg_consumed_count += 1 if msg_consumed_count >= nums: break consumer_timing = time.time() - consumer_start consumer.close() print('confluent_kafka_consumer cost:{} s'.format(consumer_timing))
def __init__(self, conf=None): self.conf = conf or { 'bootstrap.servers': BROKER, 'group.id': 'connect', 'auto.offset.reset': 'latest' } self.consumer = confluent_kafka.Consumer(**self.conf) self.consumer.subscribe([TOPIC])
def __init__(self, config: dict, topics: str) -> None: self.log = logging.getLogger() self._consumer = confluent_kafka.Consumer(config) self._closed = False self._consumer.subscribe(topics=topics.split(",")) # TODO to config self.timeout = 1.0 self._num_messages = 100
def __init__(self, configs, topics, callback): logging.info('Kafka consumer starting on topics {}...'.format(topics)) self._consumer = confluent_kafka.Consumer(configs) self._topics = topics self._callback = callback self._cancelled = False self._poll_thread = Thread(target=self._poll_loop) self._poll_thread.start()
def __init__( self, kafka_server, group_id, topic, duration, consume_all, consumer_schema_filename, producer_schema_filename, auto_offset, security_protocol=None, ca_cert=None, cert_location=None, key_location=None, key_pass=None, session_timeout=_DEFAULT_SESSION_TIMEOUT_MS): """Create a simple consumer. :param kafka_server: Connection string for bootstrap Kafka server. :param group_id: Group ID to use for distributed consumers. :param topic: Topic to consume from. :param duration: Duration to run for. :param consumer_schema_filename: Filename for consumer schema. :param producer_schema_filename: Filename for producer schema. :param auto_offset: Offset reset method to use for consumers. """ super(Consumer, self).__init__() self.kafka_server = kafka_server self.group_id = group_id self.topic = topic self.duration = duration self.consume_all = consume_all self.consumer_schema_filename = consumer_schema_filename self.producer_schema_filename = producer_schema_filename self.serializer = KafkaAvroGenericSerializer(self.consumer_schema_filename) self.deserializer = KafkaAvroGenericDeserializer( self.consumer_schema_filename, self.producer_schema_filename) self.auto_offset = auto_offset self.consume_timeout = Consumer._DEFAULT_CONSUME_TIMEOUT # Handle a sigint shutdown cleanly. self._shutdown = False config = {} config["bootstrap.servers"] = self.kafka_server config["group.id"] = self.group_id config["session.timeout.ms"] = session_timeout if security_protocol: if security_protocol.lower() == "ssl": config["security.protocol"] = security_protocol config["ssl.ca.location"] = ca_cert config["ssl.certificate.location"] = cert_location config["ssl.key.location"] = key_location config["ssl.key.password"] = key_pass elif security_protocol.lower() == "plaintext": config["security.protocol"] = security_protocol else: msg = "Unsupported security protocol type for TC APIs: " + security_protocol raise ValueError(msg) default_topic_config = {} default_topic_config["auto.offset.reset"] = self.auto_offset config["default.topic.config"] = default_topic_config self.consumer = confluent_kafka.Consumer(config) self.consumer.subscribe([self.topic]) self.latency_stats = Utils.Stats( 1, "End-to-End Latency (including Avro serialization)", "ms")
def run(self, is_shutdown_requested=lambda: False): """ Runs the message processing loop """ logger.debug( "Staring kafka consumer for topic:{} with consumer group:{}", self.topic_name, self.consumer_group, ) consumer = kafka.Consumer(self.consumer_configuration) consumer.subscribe([self.topic_name]) # setup a flag to mark termination signals received, see below why we use an array termination_signal_received = [False] def termination_signal_handler(_sig_id, _frame): """ Function to use a hook for SIGINT and SIGTERM This signal handler only remembers that the signal was emitted. The batch processing loop detects that the signal was emitted and stops once the whole batch is processed. """ # We need to use an array so that terminal_signal_received is not a # local variable assignment, but a lookup in the clojure's outer scope. termination_signal_received[0] = True with set_termination_request_handlers(termination_signal_handler): while not (is_shutdown_requested() or termination_signal_received[0]): # get up to commit_batch_size messages messages = consumer.consume( num_messages=self.commit_batch_size, timeout=self.max_fetch_time_seconds) for message in messages: message_error = message.error() if message_error is not None: logger.error("Received message with error on %s: %s", self.topic_name, message_error) raise ValueError("Bad message received from consumer", self.topic_name, message_error) safe_execute(self.process_message, message, _with_transaction=False) if len(messages) > 0: # we have read some messages in the previous consume, commit the offset consumer.commit(asynchronous=False) consumer.close() logger.debug( "Closing kafka consumer for topic:{} with consumer group:{}", self.topic_name, self.consumer_group, )
def _get_partitions( self, topic: Topic, retrieve_last_timestamp: bool, get_partition_watermarks: bool = True) -> List[Partition]: assert not ( retrieve_last_timestamp and not get_partition_watermarks ), "Can not retrieve timestamp without partition watermarks" config = Config.get_instance().create_confluent_config() config.update({ "group.id": ESQUE_GROUP_ID, "topic.metadata.refresh.interval.ms": "250" }) with closing(confluent_kafka.Consumer(config)) as consumer: confluent_topic = consumer.list_topics( topic=topic.name).topics[topic.name] partitions: List[Partition] = [] if not get_partition_watermarks: return [ Partition(partition_id, -1, -1, meta.isrs, meta.leader, meta.replicas, None) for partition_id, meta in confluent_topic.partitions.items() ] for partition_id, meta in confluent_topic.partitions.items(): try: low, high = consumer.get_watermark_offsets( TopicPartition(topic=topic.name, partition=partition_id)) except KafkaException: # retry after metadata should be refreshed (also consider small network delays) # unfortunately we cannot explicitly cause and wait for a metadata refresh time.sleep(1) low, high = consumer.get_watermark_offsets( TopicPartition(topic=topic.name, partition=partition_id)) latest_timestamp = None if high > low and retrieve_last_timestamp: assignment = [ TopicPartition(topic=topic.name, partition=partition_id, offset=high - 1) ] consumer.assign(assignment) msg = consumer.poll(timeout=10) if msg is None: logger.warning( f"Due to timeout latest timestamp for topic `{topic.name}` " f"and partition `{partition_id}` is missing.") else: latest_timestamp = float(msg.timestamp()[1]) / 1000 partition = Partition(partition_id, low, high, meta.isrs, meta.leader, meta.replicas, latest_timestamp) partitions.append(partition) return partitions
def create_kafka_consumer(self): # use the service name as the group id consumer_config = { "group.id": Config().get("name"), "bootstrap.servers": ",".join(self.bootstrap_servers), "default.topic.config": {"auto.offset.reset": "smallest"}, } return confluent_kafka.Consumer(**consumer_config)
def run(self): '''Process for reading lines from Kafka and feeding them to a process_function() function.''' logging.info(message_info(129, threading.current_thread().name)) # Create Kafka client. consumer_configuration = { 'bootstrap.servers': self.config.get('kafka_bootstrap_server'), 'group.id': self.config.get("kafka_group"), 'enable.auto.commit': False, 'auto.offset.reset': 'earliest' } consumer = confluent_kafka.Consumer(consumer_configuration) consumer.subscribe([self.config.get("kafka_topic")]) # In a loop, get messages from Kafka. while True: # Get message from Kafka queue. # Timeout quickly to allow other co-routines to process. kafka_message = consumer.poll(1.0) # Handle non-standard Kafka output. if kafka_message is None: continue if kafka_message.error(): if kafka_message.error().code( ) == confluent_kafka.KafkaError._PARTITION_EOF: continue else: logging.error(message_error(722, kafka_message.error())) continue # Construct and verify Kafka message. kafka_message_string = kafka_message.value().strip() if not kafka_message_string: continue if isinstance(kafka_message_string, bytes): kafka_message_string = kafka_message_string.decode() logging.debug( message_debug(904, threading.current_thread().name, kafka_message_string)) self.config['counter_processed_messages'] += 1 # Write message to log. logging.info(message_info(101, kafka_message_string)) consumer.commit() consumer.close()
def start(self): import confluent_kafka as ck if self.stopped: self.consumer = ck.Consumer(self.consumer_params) self.stopped = False tp = ck.TopicPartition(self.topic, 0, 0) # blocks for consumer thread to come up self.consumer.get_watermark_offsets(tp) self.loop.add_callback(self.poll_kafka)
def create_consumer(): consumer = confluent_kafka.Consumer({ 'bootstrap.servers': KAFKA_ADDR, 'group.id': 'tasks_1', 'session.timeout.ms': 6000, # 'on_commit': my_commit_callback, 'auto.offset.reset': 'earliest' }) consumer.subscribe(['tasks']) return consumer