示例#1
0
    def get_consumer_for_topic(self, topic_name, group_id, partition, offset=None):
        """
        Method to instantiate the kafka consumer for the given topic, consumer group and partition
        :param topic_name: topic name
        :param group_id: consumer group id
        :param partition: partition id
        :return: consumer instance
        """
        try:
            log.info("Fetching consumer for topic: " + topic_name)
            if topic_name + "_" + str(partition) in self.consumer_dict:
                return self.consumer_dict[topic_name + "_" + str(partition)]
            conf = {'bootstrap.servers': self.bootstrap_servers,
                    'group.id': group_id,
                    # 'session.timeout.ms': 1000,
                    'default.topic.config': {
                        'auto.offset.reset': 'earliest'
                    }
                    }
            consumer = confluent_kafka.Consumer(**conf)

            if offset is None:
                tp = confluent_kafka.TopicPartition(topic_name, partition)
            else:
                tp = confluent_kafka.TopicPartition(topic_name, partition, offset)
            consumer.assign([tp])
            self.consumer_dict[topic_name + "_" + str(partition)] = consumer
        except Exception as e:
            print(e)
            log.error("Error while setting up the consumer for topic: " + topic_name)
            raise e
        return consumer
示例#2
0
def get_message_batch(kafka_params, topic, partition, keys, low, high, timeout=None):
    """Fetch a batch of kafka messages (keys & values) in given topic/partition

    This will block until messages are available, or timeout is reached.
    """
    import confluent_kafka as ck
    t0 = time.time()
    consumer = ck.Consumer(kafka_params)
    tp = ck.TopicPartition(topic, partition, low)
    consumer.assign([tp])
    out = []
    try:
        while True:
            msg = consumer.poll(0)
            if msg and msg.value() and msg.error() is None:
                if high >= msg.offset():
                    if keys:
                        out.append({'key':msg.key(), 'value':msg.value()})
                    else:
                        out.append(msg.value())
                if high <= msg.offset():
                    break
            else:
                time.sleep(0.1)
                if timeout is not None and time.time() - t0 > timeout:
                    break
    finally:
        consumer.close()
    return out
示例#3
0
 def __enter__(self):
     self.consumer = confluent_kafka.Consumer(**self.kafka_kwargs)
     if self.frombeginning:
         self.consumer.subscribe([self.topic], on_assign=set_offset_beginning)
     else:
         self.consumer.subscribe([self.topic])
     return self
示例#4
0
    def _create_worker_consumer(self, transport: 'Transport',
                                loop: asyncio.AbstractEventLoop) -> _Consumer:
        conf = self.app.conf
        self._assignor = self.app.assignor

        # XXX parition.assignment.strategy is string
        # need to write C wrapper for this
        # 'partition.assignment.strategy': [self._assignor]
        return confluent_kafka.Consumer({
            'bootstrap.servers':
            server_list(transport.url, transport.default_port),
            'group.id':
            conf.id,
            'client.id':
            conf.broker_client_id,
            'default.topic.config': {
                'auto.offset.reset': 'earliest',
            },
            'enable.auto.commit':
            False,
            'fetch.max.bytes':
            conf.consumer_max_fetch_size,
            'request.timeout.ms':
            int(conf.broker_request_timeout * 1000.0),
            'check.crcs':
            conf.broker_check_crcs,
            'session.timeout.ms':
            int(conf.broker_session_timeout * 1000.0),
            'heartbeat.interval.ms':
            int(conf.broker_heartbeat_interval * 1000.0),
        })
示例#5
0
    def __init__(self, conf, reset=False):
        """Set @reset to True to begin consuming at start of stream."""
        config = dict()
        self.topic = conf['kafka']['topic']
        config['bootstrap.servers'] = conf['kafka']['address']

        default_topic_config = {}
        default_topic_config["auto.offset.reset"] = "smallest"
        default_topic_config['enable.auto.commit'] = True
        config["default.topic.config"] = default_topic_config

        # Set the group ID.
        state = self._get_state_info(conf)
        if not reset and state:
            group_id = state['group_id']
        else:
            group_id = "CG_" + str(uuid.uuid4())
            self._update_state(conf,'group_id', group_id)
        config["group.id"] = group_id

        # Add SSL stuff
        if conf['kafka'].getboolean('ssl_enable'):
            config["security.protocol"] = 'ssl'
            config["ssl.ca.location"] = conf['kafka']['ca_path']
            config["ssl.certificate.location"] = conf['kafka']['cert_path']
            config["ssl.key.location"] = conf['kafka']['key_path']
            config["ssl.key.password"] = conf['kafka']['password']

        self.consumer = confluent_kafka.Consumer(config)
        self.consumer.subscribe([self.topic])

        p_schema = Utils.load_schema(conf['kafka']['schema'])
        c_schema = Utils.load_schema(conf['kafka']['schema'])
        self.deserializer = KafkaAvroGenericDeserializer(c_schema, p_schema)
示例#6
0
 def __init__(
     self,
     hosts: List[str],
     group_id: str,
     subscription: List[str] = [],
     auto_offset: bool = True,
     start_from_beginning_if_no_offset_available: bool = True,
     statistics_interval_ms: int = 1000,
     use_confluent_monitoring_interceptor: bool = False,
     logger: Optional[logging.Logger] = None,
     debug: bool = False,
 ):
     conf = {
         "bootstrap.servers": ",".join(hosts),
         "group.id": group_id,
         "enable.auto.offset.store": auto_offset,
         "statistics.interval.ms": statistics_interval_ms,
         "error_cb": self.error_callback,
         "stats_cb": self.stats_callback,
         "throttle_cb": self.throttle_callback,
     }
     if start_from_beginning_if_no_offset_available:
         conf["auto.offset.reset"] = "earliest"
     if use_confluent_monitoring_interceptor:
         conf["plugin.library.paths"] = "monitoring-interceptor"
     if debug:
         conf["debug"] = "consumer"
     self.subscription = subscription
     self.logger = logger or logging.getLogger("KafkaConsumer")
     self._kafka_instance = confluent_kafka.Consumer(conf,
                                                     logger=self.logger)
     self._async_poll = async_wrap(self._kafka_instance.poll)
示例#7
0
def confluent_kafka_consumer_performance():
    topic = 'test'
    msg_consumed_count = 0
    conf = {
        'bootstrap.servers': '172.20.10.10:9092',
        'group.id': 'test-consumer-group',
        'session.timeout.ms': 6000,
        'default.topic.config': {
            'auto.offset.reset': 'earliest'
        }
    }

    consumer = confluent_kafka.Consumer(**conf)
    print(consumer)

    consumer_start = time.time()
    # This is the same as pykafka, subscribing to a topic will start a background thread
    consumer.subscribe([topic])

    while True:
        msg = consumer.poll(1)
        if msg:
            msg_consumed_count += 1
            print(msg)

        if msg_consumed_count >= 10:
            break

    consumer_timing = time.time() - consumer_start
    consumer.close()
    return consumer_timing
示例#8
0
    def __init__(self, broker, timeout=None, topics=["^ztf_.*"], **consumer_config):
        """ """

        self._metrics = KafkaMetrics.instance()
        config = {
            "bootstrap.servers": broker,
            "default.topic.config": {"auto.offset.reset": "smallest"},
            "enable.auto.commit": True,
            "receive.message.max.bytes": 2 ** 29,
            "auto.commit.interval.ms": 10000,
            "enable.auto.offset.store": False,
            "group.id": uuid.uuid1(),
            "enable.partition.eof": False,  # don't emit messages on EOF
            "topic.metadata.refresh.interval.ms": 1000,  # fetch new metadata every second to pick up topics quickly
            # "debug": "all",
            "stats_cb": self._metrics.on_stats_callback,
            "statistics.interval.ms": 10000,
        }
        config.update(**consumer_config)
        self._consumer = confluent_kafka.Consumer(**config)

        self._consumer.subscribe(topics)
        if timeout is None:
            self._poll_interval = 1
            self._poll_attempts = sys.maxsize
        else:
            self._poll_interval = max((1, min((30, timeout))))
            self._poll_attempts = max((1, int(timeout / self._poll_interval)))
        self._timeout = timeout

        self._last_message = None
示例#9
0
def _create_consumer(consumer_group, consumer_type, initial_offset_reset):
    """
    Creates a kafka consumer based on the
    :param consumer_group:
    :return:
    """
    topic_name = ConsumerType.get_topic_name(consumer_type)
    cluster_name = settings.KAFKA_TOPICS[topic_name]["cluster"]
    bootstrap_servers = settings.KAFKA_CLUSTERS[cluster_name][
        "bootstrap.servers"]

    consumer_configuration = {
        "bootstrap.servers": bootstrap_servers,
        "group.id": consumer_group,
        "enable.auto.commit": "false",  # we commit manually
        "enable.auto.offset.store":
        "true",  # we let the broker keep count of the current offset (when committing)
        "enable.partition.eof":
        "false",  # stop EOF errors when we read all messages in the topic
        "default.topic.config": {
            "auto.offset.reset": initial_offset_reset
        },
    }

    return kafka.Consumer(consumer_configuration)
示例#10
0
def test_consumer_confluent(conf: Config):
    consumer = ck.Consumer(
        **{
            'bootstrap.servers': conf["brokers"],
            'group.id': "confluent-" + str(uuid.uuid1()),
            # 'group.id': "confluent",
            'session.timeout.ms': 6000,
            'default.topic.config': {
                'auto.offset.reset': 'earliest'
            },
            'enable.auto.commit': 'true'
        })
    consumer.subscribe([conf["topic"]], on_assign=my_on_assign)

    start = datetime.datetime.now()
    max_fetch_messages = 500
    _counter = 0
    while _counter < conf["num_messages"]:
        messages = consumer.consume(num_messages=max_fetch_messages)
        for message in messages:
            _counter += 1
            if _counter >= conf["num_messages"]:
                break
    end = datetime.datetime.now()
    log.debug("Consumed %s messages", _counter)
    return TestResult(start=start, end=end, num_messages=_counter)
示例#11
0
    def inner(options=None):
        test_name = request.node.name
        topics = [_get_topic_name(_EVENTS_TOPIC_NAME, test_name)]
        options = _kafka_processing_config(test_name, options)
        # look for the servers (it is the only config we are interested in)
        servers = [
            elm['value'] for elm in options['processing']['kafka_config']
            if elm['name'] == 'bootstrap.servers'
        ]
        if len(servers) < 1:
            raise ValueError(
                "Bad kafka_config, could not find 'bootstrap.servers'.\n"
                "The configuration should have an entry of the format \n"
                "{name:'bootstrap.servers', value:'127.0.0.1'} at path 'processing.kafka_config'"
            )

        servers = servers[0]

        settings = {
            'bootstrap.servers': servers,
            'group.id': 'test.consumer',
            'enable.auto.commit': True,
            'auto.offset.reset': 'earliest',
        }

        consumer = kafka.Consumer(settings)
        consumer.subscribe(topics)

        return consumer
示例#12
0
    def poll_kafka(self):
        import confluent_kafka as ck
        consumer = ck.Consumer(self.consumer_params)

        try:
            while not self.stopped:
                out = []

                for partition in range(self.npartitions):
                    tp = ck.TopicPartition(self.topic, partition, 0)
                    try:
                        low, high = consumer.get_watermark_offsets(tp,
                                                                   timeout=0.1)
                    except (RuntimeError, ck.KafkaException):
                        continue
                    current_position = self.positions[partition]
                    lowest = max(current_position, low)
                    out.append((self.consumer_params, self.topic, partition,
                                lowest, high - 1))
                    self.positions[partition] = high

                for part in out:
                    yield self._emit(part)

                else:
                    yield gen.sleep(self.poll_interval)
        finally:
            consumer.close()
示例#13
0
    def inner(topic: str, options=None):
        topic_name = get_topic_name(topic)
        topics = [topic_name]
        options = processing_config(options)
        # look for the servers (it is the only config we are interested in)
        servers = [
            elm["value"]
            for elm in options["processing"]["kafka_config"]
            if elm["name"] == "bootstrap.servers"
        ]
        if len(servers) < 1:
            raise ValueError(
                "Bad kafka_config, could not find 'bootstrap.servers'.\n"
                "The configuration should have an entry of the format \n"
                "{name:'bootstrap.servers', value:'127.0.0.1'} at path 'processing.kafka_config'"
            )

        servers = servers[0]

        settings = {
            "bootstrap.servers": servers,
            "group.id": "test-consumer-%s" % uuid.uuid4().hex,
            "enable.auto.commit": True,
            "auto.offset.reset": "earliest",
        }

        consumer = kafka.Consumer(settings)
        consumer.assign([kafka.TopicPartition(t, 0) for t in topics])

        def die():
            consumer.close()

        request.addfinalizer(die)
        return consumer, options, topic_name
示例#14
0
    def __init__(self,
                 serializer: Serializable,
                 kafka_topic: str,
                 bootstrap_server: str = "localhost",
                 bootstrap_port: int = 9092,
                 default_group: str = "default-group",
                 *args,
                 **kwargs):
        """Connects to a kafka topic and sets up the ingest

        Args:
            serializer (Serializable): Serializer to convert a message to bytes before sending to kafka.
            kafka_topic (str): Name of kafka topic to publish to.
            bootstrap_server (str, optional): Address of the Kafka bootstrap server. Defaults to "localhost".
            bootstrap_port (int, optional): Bootstrap server port on which the topic is listening for messages. Defaults to 9092.
            default_group (str, optional): Group name for this consumer group. Defaults to "default-group".
        """
        self.kafka_topic = kafka_topic
        conf = {
            "bootstrap.servers": bootstrap_server + ":" + str(bootstrap_port),
            "client.id": socket.gethostname(),
            "group.id": default_group
        }

        self.create_topic(topic_name=kafka_topic,
                          conf=conf)  # TODO is this safe?
        self.consumer = confluent_kafka.Consumer(conf)
        self.consumer.subscribe([self.kafka_topic])
        self.running = True
        super().__init__(serializer=serializer, *args, **kwargs)
示例#15
0
	def poll(self, on_idle=None, timeout=1.0):
		consumer = confluent_kafka.Consumer({
			'bootstrap.servers':       self.servers,
			'group.id':                self.group,
			'enable.auto.commit':      False,
			'enable.partition.eof':    False,
			'socket.keepalive.enable': True,
		})

		consumer.subscribe([self.topic])

		atexit.register(consumer.close)

		# Listen for messages.
		while True:
			# Without a timeout, KeyboardInterrupt is ignored.
			message = consumer.poll(timeout=timeout)

			# No message was received before the timeout.
			if not message:
				if callable(on_idle):
					on_idle()

				continue
		
			if message.error():
				raise confluent_kafka.KafkaException(message.error())
		
			try:
				self.dispatch(message)
			except Exception as err:
				raise DispatchException(err)
			else:
				consumer.commit(message)
示例#16
0
def run(runarg, return_dict):
    """run.
    """
    processID = runarg['processID']
    # Configure database connection
    try:
        msl = make_database_connection()
    except Exception as e:
        print('ERROR cannot connect to local database', e)
        sys.stdout.flush()
        return

    # Start consumer and print alert stream
    try:
        consumer = confluent_kafka.Consumer(**runarg['conf'])
        consumer.subscribe([runarg['args'].topic])
    except Exception as e:
        print('ERROR cannot connect to kafka', e)
        sys.stdout.flush()
        return

    # Number of alerts in the batch
    if runarg['args'].maxalert:
        maxalert = runarg['args'].maxalert
    else:
        maxalert = 50000

    nalert_in = nalert_out = nalert_ss = 0
    startt = time.time()
    while nalert_in < maxalert:
        # Here we get the next alert by kafka
        msg = consumer.poll(timeout=settings.KAFKA_TIMEOUT)
        if msg is None:
            break
        if msg.error():
            continue
        if msg.value() is None:
            continue
        else:
            # Apply filter to each alert
            alert = json.loads(msg.value())
            nalert_in += 1
            d = alert_filter(alert, msl)
            nalert_out += d['nalert']
            nalert_ss += d['ss']
            if nalert_in % 1000 == 0:
                print('process %d nalert_in %d nalert_out  %d time %.1f' %
                      (processID, nalert_in, nalert_out, time.time() - startt))
                sys.stdout.flush()
                # refresh the database every 1000 alerts
                # make sure everything is committed
                msl.close()
                msl = make_database_connection()

    consumer.close()
    return_dict[processID] = {
        'nalert_in': nalert_in,
        'nalert_out': nalert_out,
        'nalert_ss': nalert_ss
    }
示例#17
0
def confluent_kafka_consumer_performance(topic=topic):

    msg_consumed_count = 0
    conf = {
        'bootstrap.servers': bootstrap_servers,
        'group.id': uuid.uuid1(),
        'session.timeout.ms': 6000,
        'default.topic.config': {
            'auto.offset.reset': 'earliest'
        }
    }

    consumer = confluent_kafka.Consumer(**conf)
    print("\n>>> Connect Kafka in {} by confluent-kafka-python as consumer".
          format(bootstrap_servers))

    consumer_start = time.time()
    # This is the same as pykafka, subscribing to a topic will start a background thread
    consumer.subscribe([topic])

    while True:
        msg = consumer.poll(1)
        if msg:
            msg_consumed_count += 1

        if msg_consumed_count >= msg_count:
            break

    consumer_timing = time.time() - consumer_start
    consumer.close()
    return consumer_timing
示例#18
0
    def __init__(self, config: KafkaSourceConfig, ctx: PipelineContext):
        super().__init__(config, ctx)
        self.source_config = config
        if (
            self.is_stateful_ingestion_configured()
            and not self.source_config.platform_instance
        ):
            raise ConfigurationError(
                "Enabling kafka stateful ingestion requires to specify a platform instance."
            )

        self.consumer = confluent_kafka.Consumer(
            {
                "group.id": "test",
                "bootstrap.servers": self.source_config.connection.bootstrap,
                **self.source_config.connection.consumer_config,
            }
        )
        # Use the fully qualified name for SchemaRegistryClient to make it mock patchable for testing.
        self.schema_registry_client = (
            confluent_kafka.schema_registry.schema_registry_client.SchemaRegistryClient(
                {
                    "url": self.source_config.connection.schema_registry_url,
                    **self.source_config.connection.schema_registry_config,
                }
            )
        )
        self.report = KafkaSourceReport()
        self.known_schema_registry_subjects: List[str] = []
        try:
            self.known_schema_registry_subjects.extend(
                self.schema_registry_client.get_subjects()
            )
        except Exception as e:
            logger.warning(f"Failed to get subjects from schema registry: {e}")
示例#19
0
def get_kafka_consumer():
    consumer = None
    try:
        consumer = confluent_kafka.Consumer(**get_config())
    except Exception as e:
        print('Could not create kafka consumer', e, file=sys.stderr)
    return consumer
示例#20
0
def confluent_kafka_consumer_performance(nums=5000):

    topic = b'event_log'
    msg_consumed_count = 0
    conf = {
        'bootstrap.servers': '192.168.0.162:9092',
        'group.id': 'zy_consumer',
        'session.timeout.ms': 6000,
        'default.topic.config': {
            'auto.offset.reset': 'earliest'
        }
    }

    consumer = confluent_kafka.Consumer(**conf)

    consumer_start = time.time()
    # This is the same as pykafka, subscribing to a topic will start a background thread
    consumer.subscribe([topic])

    while True:
        print('>>>>>>>>>>>>>>>Starting<<<<<<<<<<<<<<<<<')
        msg = consumer.consume()
        if msg:
            print(msg.value)
            msg_consumed_count += 1

        if msg_consumed_count >= nums:
            break

    consumer_timing = time.time() - consumer_start
    consumer.close()
    print('confluent_kafka_consumer cost:{} s'.format(consumer_timing))
示例#21
0
 def __init__(self, conf=None):
     self.conf = conf or {
         'bootstrap.servers': BROKER,
         'group.id': 'connect',
         'auto.offset.reset': 'latest'
     }
     self.consumer = confluent_kafka.Consumer(**self.conf)
     self.consumer.subscribe([TOPIC])
 def __init__(self, config: dict, topics: str) -> None:
     self.log = logging.getLogger()
     self._consumer = confluent_kafka.Consumer(config)
     self._closed = False
     self._consumer.subscribe(topics=topics.split(","))
     # TODO to config
     self.timeout = 1.0
     self._num_messages = 100
示例#23
0
 def __init__(self, configs, topics, callback):
     logging.info('Kafka consumer starting on topics {}...'.format(topics))
     self._consumer = confluent_kafka.Consumer(configs)
     self._topics = topics
     self._callback = callback
     self._cancelled = False
     self._poll_thread = Thread(target=self._poll_loop)
     self._poll_thread.start()
示例#24
0
    def __init__(
            self, kafka_server, group_id, topic, duration, consume_all,
            consumer_schema_filename, producer_schema_filename, auto_offset,
            security_protocol=None, ca_cert=None, cert_location=None,
            key_location=None, key_pass=None, session_timeout=_DEFAULT_SESSION_TIMEOUT_MS):
        """Create a simple consumer.

        :param kafka_server: Connection string for bootstrap Kafka server.
        :param group_id: Group ID to use for distributed consumers.
        :param topic: Topic to consume from.
        :param duration: Duration to run for.
        :param consumer_schema_filename: Filename for consumer schema.
        :param producer_schema_filename: Filename for producer schema.
        :param auto_offset: Offset reset method to use for consumers.
        """
        super(Consumer, self).__init__()
        self.kafka_server = kafka_server
        self.group_id = group_id
        self.topic = topic
        self.duration = duration
        self.consume_all = consume_all
        self.consumer_schema_filename = consumer_schema_filename
        self.producer_schema_filename = producer_schema_filename
        self.serializer = KafkaAvroGenericSerializer(self.consumer_schema_filename)
        self.deserializer = KafkaAvroGenericDeserializer(
                self.consumer_schema_filename, self.producer_schema_filename)
        self.auto_offset = auto_offset
        self.consume_timeout = Consumer._DEFAULT_CONSUME_TIMEOUT

        # Handle a sigint shutdown cleanly.
        self._shutdown = False

        config = {}
        config["bootstrap.servers"] = self.kafka_server
        config["group.id"] = self.group_id
        config["session.timeout.ms"] = session_timeout

        if security_protocol:
            if security_protocol.lower() == "ssl":
                config["security.protocol"] = security_protocol
                config["ssl.ca.location"] = ca_cert
                config["ssl.certificate.location"] = cert_location
                config["ssl.key.location"] = key_location
                config["ssl.key.password"] = key_pass
            elif security_protocol.lower() == "plaintext":
                config["security.protocol"] = security_protocol
            else:
                msg = "Unsupported security protocol type for TC APIs: " + security_protocol
                raise ValueError(msg)

        default_topic_config = {}
        default_topic_config["auto.offset.reset"] = self.auto_offset
        config["default.topic.config"] = default_topic_config

        self.consumer = confluent_kafka.Consumer(config)
        self.consumer.subscribe([self.topic])
        self.latency_stats = Utils.Stats(
                1, "End-to-End Latency (including Avro serialization)", "ms")
示例#25
0
    def run(self, is_shutdown_requested=lambda: False):
        """
        Runs the message processing loop
        """
        logger.debug(
            "Staring kafka consumer for topic:{} with consumer group:{}",
            self.topic_name,
            self.consumer_group,
        )

        consumer = kafka.Consumer(self.consumer_configuration)
        consumer.subscribe([self.topic_name])

        # setup a flag to mark termination signals received, see below why we use an array
        termination_signal_received = [False]

        def termination_signal_handler(_sig_id, _frame):
            """
            Function to use a hook for SIGINT and SIGTERM

            This signal handler only remembers that the signal was emitted.
            The batch processing loop detects that the signal was emitted
            and stops once the whole batch is processed.
            """
            # We need to use an array so that terminal_signal_received is not a
            # local variable assignment, but a lookup in the clojure's outer scope.
            termination_signal_received[0] = True

        with set_termination_request_handlers(termination_signal_handler):
            while not (is_shutdown_requested()
                       or termination_signal_received[0]):
                # get up to commit_batch_size messages
                messages = consumer.consume(
                    num_messages=self.commit_batch_size,
                    timeout=self.max_fetch_time_seconds)

                for message in messages:
                    message_error = message.error()
                    if message_error is not None:
                        logger.error("Received message with error on %s: %s",
                                     self.topic_name, message_error)
                        raise ValueError("Bad message received from consumer",
                                         self.topic_name, message_error)

                    safe_execute(self.process_message,
                                 message,
                                 _with_transaction=False)

                if len(messages) > 0:
                    # we have read some messages in the previous consume, commit the offset
                    consumer.commit(asynchronous=False)

        consumer.close()
        logger.debug(
            "Closing kafka consumer for topic:{} with consumer group:{}",
            self.topic_name,
            self.consumer_group,
        )
示例#26
0
    def _get_partitions(
            self,
            topic: Topic,
            retrieve_last_timestamp: bool,
            get_partition_watermarks: bool = True) -> List[Partition]:
        assert not (
            retrieve_last_timestamp and not get_partition_watermarks
        ), "Can not retrieve timestamp without partition watermarks"

        config = Config.get_instance().create_confluent_config()
        config.update({
            "group.id": ESQUE_GROUP_ID,
            "topic.metadata.refresh.interval.ms": "250"
        })
        with closing(confluent_kafka.Consumer(config)) as consumer:
            confluent_topic = consumer.list_topics(
                topic=topic.name).topics[topic.name]
            partitions: List[Partition] = []
            if not get_partition_watermarks:
                return [
                    Partition(partition_id, -1, -1, meta.isrs, meta.leader,
                              meta.replicas, None) for partition_id, meta in
                    confluent_topic.partitions.items()
                ]
            for partition_id, meta in confluent_topic.partitions.items():
                try:
                    low, high = consumer.get_watermark_offsets(
                        TopicPartition(topic=topic.name,
                                       partition=partition_id))
                except KafkaException:
                    # retry after metadata should be refreshed (also consider small network delays)
                    # unfortunately we cannot explicitly cause and wait for a metadata refresh
                    time.sleep(1)
                    low, high = consumer.get_watermark_offsets(
                        TopicPartition(topic=topic.name,
                                       partition=partition_id))

                latest_timestamp = None
                if high > low and retrieve_last_timestamp:
                    assignment = [
                        TopicPartition(topic=topic.name,
                                       partition=partition_id,
                                       offset=high - 1)
                    ]
                    consumer.assign(assignment)
                    msg = consumer.poll(timeout=10)
                    if msg is None:
                        logger.warning(
                            f"Due to timeout latest timestamp for topic `{topic.name}` "
                            f"and partition `{partition_id}` is missing.")
                    else:
                        latest_timestamp = float(msg.timestamp()[1]) / 1000
                partition = Partition(partition_id, low, high, meta.isrs,
                                      meta.leader, meta.replicas,
                                      latest_timestamp)
                partitions.append(partition)
        return partitions
示例#27
0
    def create_kafka_consumer(self):
        # use the service name as the group id
        consumer_config = {
            "group.id": Config().get("name"),
            "bootstrap.servers": ",".join(self.bootstrap_servers),
            "default.topic.config": {"auto.offset.reset": "smallest"},
        }

        return confluent_kafka.Consumer(**consumer_config)
示例#28
0
    def run(self):
        '''Process for reading lines from Kafka and feeding them to a process_function() function.'''

        logging.info(message_info(129, threading.current_thread().name))

        # Create Kafka client.

        consumer_configuration = {
            'bootstrap.servers': self.config.get('kafka_bootstrap_server'),
            'group.id': self.config.get("kafka_group"),
            'enable.auto.commit': False,
            'auto.offset.reset': 'earliest'
        }
        consumer = confluent_kafka.Consumer(consumer_configuration)
        consumer.subscribe([self.config.get("kafka_topic")])

        # In a loop, get messages from Kafka.

        while True:

            # Get message from Kafka queue.
            # Timeout quickly to allow other co-routines to process.

            kafka_message = consumer.poll(1.0)

            # Handle non-standard Kafka output.

            if kafka_message is None:
                continue
            if kafka_message.error():
                if kafka_message.error().code(
                ) == confluent_kafka.KafkaError._PARTITION_EOF:
                    continue
                else:
                    logging.error(message_error(722, kafka_message.error()))
                    continue

            # Construct and verify Kafka message.

            kafka_message_string = kafka_message.value().strip()
            if not kafka_message_string:
                continue
            if isinstance(kafka_message_string, bytes):
                kafka_message_string = kafka_message_string.decode()
            logging.debug(
                message_debug(904,
                              threading.current_thread().name,
                              kafka_message_string))
            self.config['counter_processed_messages'] += 1

            # Write message to log.

            logging.info(message_info(101, kafka_message_string))
            consumer.commit()

        consumer.close()
示例#29
0
文件: sources.py 项目: masums/streamz
    def start(self):
        import confluent_kafka as ck
        if self.stopped:
            self.consumer = ck.Consumer(self.consumer_params)
            self.stopped = False
            tp = ck.TopicPartition(self.topic, 0, 0)

            # blocks for consumer thread to come up
            self.consumer.get_watermark_offsets(tp)
            self.loop.add_callback(self.poll_kafka)
示例#30
0
def create_consumer():
    consumer = confluent_kafka.Consumer({
        'bootstrap.servers': KAFKA_ADDR,
        'group.id': 'tasks_1',
        'session.timeout.ms': 6000,
        # 'on_commit': my_commit_callback,
        'auto.offset.reset': 'earliest'
    })
    consumer.subscribe(['tasks'])
    return consumer