Exemplo n.º 1
0
    def _partition(self):
        """Consume messages from kafka

           Consume messages from kafka using the Kazoo SetPartitioner to
           allow multiple consumer processes to negotiate access to the kafka
           partitions
        """

        # KazooClient and SetPartitioner objects need to be instantiated after
        # the consumer process has forked.  Instantiating prior to forking
        # gives the appearance that things are working but after forking the
        # connection to zookeeper is lost and no state changes are visible

        if not self._kazoo_client:
            self._kazoo_client = KazooClient(hosts=self._zookeeper_url)
            self._kazoo_client.start()

            state_change_event = threading.Event()

            self._set_partitioner = (SetPartitioner(
                self._kazoo_client,
                path=self._zookeeper_path,
                set=self._consumer.fetch_offsets.keys(),
                state_change_event=state_change_event,
                identifier=str(datetime.datetime.now())))

        try:
            while 1:
                if self._set_partitioner.failed:
                    raise Exception("Failed to acquire partition")

                elif self._set_partitioner.release:
                    log.info("Releasing locks on partition set {} "
                             "for topic {}".format(self._partitions,
                                                   self._kafka_topic))
                    self._set_partitioner.release_set()

                    self._partitions = []

                elif self._set_partitioner.acquired:
                    if not self._partitions:
                        self._partitions = [p for p in self._set_partitioner]

                        if not self._partitions:
                            log.info("Not assigned any partitions on topic {},"
                                     " waiting for a Partitioner state change".
                                     format(self._kafka_topic))
                            state_change_event.wait()
                            state_change_event.clear()
                            continue

                        log.info("Acquired locks on partition set {} "
                                 "for topic {}".format(self._partitions,
                                                       self._kafka_topic))

                        # Reconstruct the kafka consumer object because the
                        # consumer has no API that allows the set of partitons
                        # to be updated outside of construction.
                        self._consumer.stop()
                        self._consumer = self._create_kafka_consumer(
                            self._partitions)
                        return

                elif self._set_partitioner.allocating:
                    log.info("Waiting to acquire locks on partition set")
                    self._set_partitioner.wait_for_acquire()

        except Exception:
            log.exception('KafkaConsumer encountered fatal exception '
                          'processing messages.')
            raise
    def __iter__(self):
        """Consume messages from kafka using the Kazoo SetPartitioner to
           allow multiple consumer processes to negotiate access to the kafka
           partitions
        """

        # KazooClient and SetPartitioner objects need to be instantiated after
        # the consumer process has forked.  Instantiating prior to forking
        # gives the appearance that things are working but after forking the
        # connection to zookeeper is lost and no state changes are visible

        kazoo_client = KazooClient(hosts=self._zookeeper_url)
        kazoo_client.start()

        set_partitioner = (SetPartitioner(
            kazoo_client,
            path=self._zookeeper_path,
            set=self._consumer.fetch_offsets.keys()))

        consumed_from_kafka = self._statsd.get_counter(
            name='consumed_from_kafka')

        try:
            partitions = []

            while 1:
                if set_partitioner.failed:
                    raise Exception("Failed to acquire partition")

                elif set_partitioner.release:
                    log.info("Releasing locks on partition set {} "
                             "for topic {}".format(partitions,
                                                   self._kafka_topic))
                    set_partitioner.release_set()

                    partitions = []

                elif set_partitioner.acquired:
                    if not partitions:
                        partitions = [p for p in set_partitioner]

                        log.info("Acquired locks on partition set {} "
                                 "for topic {}".format(partitions,
                                                       self._kafka_topic))

                        # Refresh the last known offsets again to make sure
                        # that they are the latest after having acquired the
                        # lock. Updates self._consumer.fetch_offsets.
                        self._consumer.fetch_last_known_offsets()

                        # Modify self._consumer.fetch_offsets to hold only the
                        # offsets for the set of Kafka partitions acquired
                        # by this instance of the persister.

                        partitioned_fetch_offsets = {}
                        for p in partitions:
                            partitioned_fetch_offsets[p] = (
                                self._consumer.fetch_offsets[p])

                        self._consumer.fetch_offsets = partitioned_fetch_offsets

                    # When Kafka resizes the partitions it's possible that it
                    # will remove data at our current offset.  When this
                    # happens the next attempt to read from Kafka will generate
                    # an OffsetOutOfRangeError.  We trap this error and seek to
                    # the head of the current Kafka data.  Because this error
                    # only happens when Kafka removes data we're currently
                    # pointing at we're gauranteed that we won't read any
                    # duplicate data however we will lose any information
                    # between our current offset and the new Kafka head.

                    try:
                        for message in self._consumer:
                            if not set_partitioner.acquired:
                                break
                            consumed_from_kafka += 1

                            log.debug("Consuming message from kafka, "
                                      "partition {}, offset {}".format(
                                          message[0], message[1].offset))

                            yield message
                    except kafka.common.OffsetOutOfRangeError:
                        log.error("Kafka OffsetOutOfRange.  Jumping to head.")
                        self._consumer.seek(0, 0)

                elif set_partitioner.allocating:
                    log.info("Waiting to acquire locks on partition set")
                    set_partitioner.wait_for_acquire()

        except:
            log.exception('KafkaConsumer encountered fatal exception '
                          'processing messages.')
            raise