Пример #1
0
def kafka_pull(message_queue):
    global g_conf
    global g_master_logger
    ret = True
    while True:
        try:
            if is_quit():
                g_master_logger.info("thread quit: [%d]" % os.getpid())
                return True

            random_v = random.randint(0, len(g_conf["broker_list"]) - 1)
            broker = g_conf["broker_list"][random_v]
            g_master_logger.info("use broker is [%s]" % broker)
            partition_set = set([0])

            # client
            client = KafkaClient(broker)
            consumer = SimpleConsumer(
                client,
                g_conf["msg_group_name"],
                g_conf["msg_topic_name"],
                partitions=partition_set,
                auto_commit_every_n=g_conf["auto_commit_every_n"],
                auto_commit_every_t=g_conf["auto_commit_every_t"],
                fetch_size_bytes=g_conf["fetch_size_bytes"],
                buffer_size=g_conf["buffer_size"],
                max_buffer_size=g_conf["max_buffer_size"])

            cnt = 0
            for message in consumer:
                cnt += 1
                if cnt % 10000 == 0:
                    g_master_logger.info("msg consumer cnt is [%d] queue:%u" %
                                         (cnt, message_queue.qsize()))
                if is_quit():
                    consumer.stop()
                    g_master_logger.info("thread fetch msg quit: [%d]" %
                                         os.getpid())
                    break

                value = message.message.value
                if value == None:
                    g_master_logger.warning("value is none, msg is [%s]" %
                                            str(message))
                    continue
                if len(value) == 0:
                    g_master_logger.warning("value len is 0, msg is [%s]" %
                                            str(message))
                    continue
                if check_pkg(value) == False:
                    continue
                message_queue.put(message)

        except Exception, e:
            g_master_logger.error(
                "work error, exception is [%s], traceback is [%s]" %
                (e, traceback.format_exc()))
            time.sleep(5)
            continue
Пример #2
0
        def run(self):
            client = None
            consumer = None
            try:
                prev = None
                # print("Starting Kafka Client")
                # print("Kafka topic: {}").format(self.topic)
                print get_kafka_hosts()
                client = KafkaClient(hosts=get_kafka_hosts())
                consumer = SimpleConsumer(client=client,
                                          group=self.groupName.encode(
                                              'ascii', 'ignore'),
                                          topic=self.topic,
                                          iter_timeout=5)
                consumer.seek(0, 1)
                print '[Kafka Consumer] START'
                print 'Topic: {}'.format(self.topic)
                print 'Listening incoming message...'
                print '========================================================='
                # print("Listening kafka message...")

                while self.stopCpu is False:
                    for message in consumer.get_messages(count=5, block=False):
                        if self.stopCpu is True:
                            # print("Kafka Consumer Listening Stopped")
                            break

                        if message:
                            offset = message.offset
                            value = message.message.value
                            print 'msg: {0}, offset: {1}'.format(value, offset)

                            if len(value) > 0:
                                # chartdata = []
                                # j_val = json.loads(value)
                                # j_val['offset'] = offset
                                # chartdata.append(j_val)
                                # print("destination => ws"+str(self.pid))
                                # self.parentOj.emit("ws"+str(self.type), chartdata)
                                # self.parentOj.emit(self.topic, value)
                                self.parentOj.emit("ws" + str(self.pid), value)

                print '[Kafka Consumer] STOP'
                print 'Topic: {}'.format(self.topic)
                print 'Stop listening...'
                print '========================================================'
                # print("Listening kafka Stopped")
                consumer.stop()
                client.close()
            except Exception as e:
                consumer.stop()
                client.close()
Пример #3
0
class ZKConsumer(object):

    zk_timeout = 30
    jitter_seconds = 30
    broker_prefix = '/brokers/ids'

    def __init__(
            self,
            zk_hosts,
            group,
            topic,
            nodes,
            zk_handler=None,
            logger=None,
            identifier=None,
            **consumer_kwargs):
        """Creates a Consumer that tracks state in ZooKeeper,
        rebalancing partition ownership as registered consumers change.
        NOTE: this class is intended for version 0.8.1 of Kafka, where offsets
              are managed by Kafka but there is no rebalancing in the protocol.
        """
        if logger is None:
            logger = logging.getLogger('kafka.consumer.ZKConsumer')
        self.logger = logger
        self.identifier = identifier

        if KafkaClient is None:
            raise RuntimeError("Kafka support requires cs.eyrie to be installed with the Kafka extra: install_requires= ['cs.eyrie[Kafka]']")
        self.zk_handler = zk_handler
        self.zk_hosts = zk_hosts
        self.broker_hosts = []

        self.group = group
        self.topic = topic

        self.zk = None
        self.nodes = nodes
        self.client = None
        self.consumer = None
        self.consumer_kwargs = consumer_kwargs

        # This will kick off a cascading sequence to initialize ourselves:
        # 1. Connect to ZK and pull list of Kafka brokers
        # 2. Register ourselves as a consumer in ZK
        # 3. Rebalance partitions across all connected consumers
        self.init_zk()

    def zk_session_watch(self, state):
        self.logger.debug('ZK transitioned to: %s', state)
        if state == KazooState.SUSPENDED:
            if self.consumer is not None:
                self.logger.info('Stopping Kafka consumer')
                self.consumer.stop()
                self.consumer = None
            # Lost connection to ZK; we can't call any methods that would
            # try to contact it (i.e., we can't do self.zkp.finish() )
            self.zkp = None
        elif state == KazooState.CONNECTED:
            self.logger.info('Restarting ZK partitioner')
            self.zk.handler.spawn(self.init_zkp)

    def _zkp_wait(self):
        handler = self.zk.handler
        while 1:
            if self.zkp.failed:
                self.logger.warning("Lost or unable to acquire partition")
                self.stop()
            elif self.zkp.release:
                self.zkp.release_set()
            elif self.zkp.acquired:
                def group_change_proxy(event):
                    self.logger.warn('Connected consumers changed')
                    if self.zkp is None:
                        self.logger.info('Restarting ZK partitioner')
                        handler.spawn(self.init_zkp)
                    elif self.zkp is not None and self.zkp.failed:
                        self.logger.warning("Lost or unable to acquire partition")
                        self.stop()
                    else:
                        self.logger.info('Scheduling ZK partitioner set release')
                        rel_greenlet = handler.spawn(self.zkp.release_set)
                        self.logger.info('Scheduling group re-join')
                        rel_greenlet.link_value(lambda greenlet: self.zkp.join_group)
                if not self.nodes:
                    self.logger.info('Partitioner aquired; setting child watch')
                    result = self.zk.get_children_async(self.zkp._group_path)
                    result.rawlink(group_change_proxy)
                # Break out of while loop to begin consuming events
                break
            elif self.zkp.allocating:
                self.zkp.wait_for_acquire()

    def init_zkp(self):
        if not hasattr(self, 'zkp') or self.zkp is None:
            if self.nodes:
                self.zkp = StaticZKPartitioner(
                    self.zk, self.group, self.topic, self.nodes,
                    partitions_changed_cb=self.init_consumer,
                    logger=self.logger, identifier=self.identifier)
            else:
                self.zkp = ZKPartitioner(
                    self.zk, self.group, self.topic,
                    time_boundary=self.jitter_seconds,
                    partitions_changed_cb=self.init_consumer,
                    logger=self.logger, identifier=self.identifier)

        self._zkp_wait()

    def init_zk(self):
        # TODO: switch to async
        # 1. implement kazoo.interfaces.IHandler in terms of Tornado's IOLoop
        self.zk = KazooClient(hosts=self.zk_hosts, handler=self.zk_handler)
        self.zk.start()
        self.zk.add_listener(self.zk_session_watch)

        @self.zk.ChildrenWatch(self.broker_prefix)
        def broker_change_proxy(broker_ids):
            self.onBrokerChange(broker_ids)

        self.init_zkp()

    def onBrokerChange(self, broker_ids):
        self.broker_hosts = []
        for b_id in broker_ids:
            b_json, zstat = self.zk.get('/'.join([self.broker_prefix, b_id]))
            b_data = json.loads(b_json)
            self.broker_hosts.append('{}:{}'.format(b_data['host'],
                                                    b_data['port']))

        my_partitions = []
        if self.consumer is not None:
            self.logger.warn('Brokers changed, stopping Kafka consumer.')
            my_partitions = self.consumer.offsets.keys()
            self.consumer.stop()
            self.consumer = None
        if self.client is not None:
            self.logger.warn('Brokers changed, stopping Kafka client.')
            self.client.close()
            self.client = None

        if my_partitions:
            msg = 'Brokers changed, queuing restart of Kafka client / consumer.'
            self.logger.warn(msg)
            self.zk.handler.spawn(self.init_consumer, my_partitions)

    def init_consumer(self, my_partitions):
        if self.consumer is None:
            self.logger.warn('Starting Kafka client')
            self.client = KafkaClient(self.broker_hosts,
                                      client_id=self.zkp._identifier)
        else:
            if self.consumer is None or \
               sorted(my_partitions) != sorted(self.consumer.offsets.keys()):
                self.logger.warn('Partitions changed, restarting Kafka consumer.')
                self.consumer.stop()
            else:
                self.logger.info('Partitions unchanged, not restarting Kafka consumer.')
                return

        self.consumer = SimpleConsumer(self.client, self.group, self.topic,
                                       partitions=my_partitions,
                                       **self.consumer_kwargs)
        self.consumer.provide_partition_info()
        self.logger.info("Consumer connected to Kafka: %s", self.consumer.offsets)

    def stop(self):
        if self.consumer is not None:
            self.logger.info('Stopping Kafka consumer')
            self.consumer.stop()
            self.consumer = None
        if self.client is not None:
            self.logger.info('Stopping Kafka client')
            self.client.close()
            self.client = None
        if self.zk is not None:
            self.logger.info('Stopping ZooKeeper client')
            if self.zkp is not None and not self.zkp.failed:
                self.zkp.finish()
                self.zk.stop()
            self.zkp = None
            self.zk = None

    def commit(self, partitions=None):
        """
        Commit offsets for this consumer

        partitions: list of partitions to commit, default is to commit
                    all of them
        """
        if self.consumer is None:
            return
        self.logger.debug('Begin committing offsets for partitions: %s',
                          partitions if partitions else 'All')
        self.consumer.commit(partitions)
        self.logger.debug('End committing offsets for partitions: %s',
                          partitions if partitions else 'All')

    def pending(self, partitions=None):
        """
        Gets the pending message count

        partitions: list of partitions to check for, default is to check all
        """
        return self.consumer.pending(partitions)

    def provide_partition_info(self):
        """
        Indicates that partition info must be returned by the consumer
        """
        self.consumer.provide_partition_info()

    def seek(self, offset, whence):
        """
        Alter the current offset in the consumer, similar to fseek

        offset: how much to modify the offset
        whence: where to modify it from
                0 is relative to the earliest available offset (head)
                1 is relative to the current offset
                2 is relative to the latest known offset (tail)
        """
        self.consumer.seek(offset, whence)

    def get_messages(self, count=1, block=True, timeout=0.1):
        """
        Fetch the specified number of messages

        count: Indicates the maximum number of messages to be fetched
        block: If True, the API will block till some messages are fetched.
        timeout: If block is True, the function will block for the specified
                 time (in seconds) until count messages is fetched. If None,
                 it will block forever.
        """
        if self.consumer is None:
            return []
        else:
            try:
                messages = self.consumer.get_messages(count, block, timeout)
                if not messages and self.zkp.failed:
                    raise FailedPayloadsError
                return messages
            except FailedPayloadsError as err:
                msg = 'Failed to retrieve payload, restarting consumer'
                self.logger.exception(msg)
                raise err

    def get_message(self, block=True, timeout=0.1, get_partition_info=None):
        return self.consumer.get_message(block, timeout, get_partition_info)

    def _get_message(self, block=True, timeout=0.1, get_partition_info=None,
                     update_offset=True):
        return self.consumer._get_message(block, timeout, get_partition_info,
                                          update_offset)

    def __iter__(self):
        for msg in self.consumer:
            yield msg
Пример #4
0
    def check(self, instance):
        consumer_groups = self.read_config(instance, 'consumer_groups',
                                           cast=self._validate_consumer_groups)
        kafka_host_ports = self.read_config(instance, 'kafka_connect_str')
        full_output = self.read_config(instance, 'full_output', cast=bool)
        dimensions = self.read_config(instance, 'dimensions', cast=dict, optional=True)
        new_dimensions = {'component': 'kafka', 'service': 'kafka'}
        if dimensions is not None:
            new_dimensions.update(dimensions.copy())

        try:
            # Connect to Kafka
            kafka_conn = KafkaClient(kafka_host_ports)

            # Query Kafka for consumer offsets
            consumer_offsets = {}
            topics = defaultdict(set)
            for consumer_group, topic_partitions in consumer_groups.iteritems():
                for topic, partitions in topic_partitions.iteritems():
                    consumer = SimpleConsumer(kafka_conn, consumer_group, topic)
                    # Remember the topic partitions that we've see so that we can
                    # look up their broker offsets later
                    topics[topic].update(set(partitions))
                    for partition in partitions:
                        consumer_offsets[(consumer_group, topic, partition)] = consumer.offsets[partition]
                    consumer.stop()

            # Query Kafka for the broker offsets, done in a separate loop so only one query is done
            # per topic even if multiple consumer groups watch the same topic
            broker_offsets = {}
            for topic, partitions in topics.items():
                offset_responses = kafka_conn.send_offset_request([
                    OffsetRequest(topic, p, -1, 1) for p in partitions])

                for resp in offset_responses:
                    broker_offsets[(resp.topic, resp.partition)] = resp.offsets[0]
        finally:
            try:
                kafka_conn.close()
            except Exception:
                self.log.exception('Error cleaning up Kafka connection')

        # Report the broker data
        if full_output:
            for (topic, partition), broker_offset in broker_offsets.items():
                broker_dimensions = new_dimensions.copy()
                broker_offset = broker_offsets.get((topic, partition))
                self.gauge('kafka.broker_offset',
                           broker_offset,
                           dimensions={'topic': topic,
                                       'partition': partition}.update(broker_dimensions))

        # Report the consumer data
        for (consumer_group, topic, partition), consumer_offset in consumer_offsets.items():
            # Get the broker offset
            broker_offset = broker_offsets.get((topic, partition))
            # Report the consumer offset and lag
            consumer_dimensions = new_dimensions.copy()
            consumer_dimensions['topic'] = topic
            consumer_dimensions['partition'] = partition
            consumer_dimensions['consumer_group'] = consumer_group
            if full_output:
                self.gauge('kafka.consumer_offset',
                           consumer_offset,
                           dimensions={'topic': topic,
                                       'partition': partition,
                                       'consumer_group': consumer_group}.update(consumer_dimensions))
            self.gauge('kafka.consumer_lag',
                       broker_offset - consumer_offset,
                       dimensions={'topic': topic,
                                   'partition': partition,
                                   'consumer_group': consumer_group}.update(consumer_dimensions))
Пример #5
0
class ZKConsumer(object):

    zk_timeout = 30
    jitter_seconds = 30
    broker_prefix = '/brokers/ids'

    def __init__(self,
                 zk_hosts,
                 group,
                 topic,
                 nodes,
                 zk_handler=None,
                 logger=None,
                 identifier=None,
                 **consumer_kwargs):
        """Creates a Consumer that tracks state in ZooKeeper,
        rebalancing partition ownership as registered consumers change.
        NOTE: this class is intended for version 0.8.1 of Kafka, where offsets
              are managed by Kafka but there is no rebalancing in the protocol.
        """
        if logger is None:
            logger = logging.getLogger('kafka.consumer.ZKConsumer')
        self.logger = logger
        self.identifier = identifier

        if KafkaClient is None:
            raise RuntimeError(
                "Kafka support requires cs.eyrie to be installed with the Kafka extra: install_requires= ['cs.eyrie[Kafka]']"
            )
        self.zk_handler = zk_handler
        self.zk_hosts = zk_hosts
        self.broker_hosts = []

        self.group = group
        self.topic = topic

        self.zk = None
        self.nodes = nodes
        self.client = None
        self.consumer = None
        self.consumer_kwargs = consumer_kwargs

        # This will kick off a cascading sequence to initialize ourselves:
        # 1. Connect to ZK and pull list of Kafka brokers
        # 2. Register ourselves as a consumer in ZK
        # 3. Rebalance partitions across all connected consumers
        self.init_zk()

    def zk_session_watch(self, state):
        self.logger.debug('ZK transitioned to: %s', state)
        if state == KazooState.SUSPENDED:
            if self.consumer is not None:
                self.logger.info('Stopping Kafka consumer')
                self.consumer.stop()
                self.consumer = None
            # Lost connection to ZK; we can't call any methods that would
            # try to contact it (i.e., we can't do self.zkp.finish() )
            self.zkp = None
        elif state == KazooState.CONNECTED:
            self.logger.info('Restarting ZK partitioner')
            self.zk.handler.spawn(self.init_zkp)

    def _zkp_wait(self):
        handler = self.zk.handler
        while 1:
            if self.zkp.failed:
                self.logger.warning("Lost or unable to acquire partition")
                self.stop()
            elif self.zkp.release:
                self.zkp.release_set()
            elif self.zkp.acquired:

                def group_change_proxy(event):
                    self.logger.warn('Connected consumers changed')
                    if self.zkp is None:
                        self.logger.info('Restarting ZK partitioner')
                        handler.spawn(self.init_zkp)
                    elif self.zkp is not None and self.zkp.failed:
                        self.logger.warning(
                            "Lost or unable to acquire partition")
                        self.stop()
                    else:
                        self.logger.info(
                            'Scheduling ZK partitioner set release')
                        rel_greenlet = handler.spawn(self.zkp.release_set)
                        self.logger.info('Scheduling group re-join')
                        rel_greenlet.link_value(
                            lambda greenlet: self.zkp.join_group)

                if not self.nodes:
                    self.logger.info(
                        'Partitioner aquired; setting child watch')
                    result = self.zk.get_children_async(self.zkp._group_path)
                    result.rawlink(group_change_proxy)
                # Break out of while loop to begin consuming events
                break
            elif self.zkp.allocating:
                self.zkp.wait_for_acquire()

    def init_zkp(self):
        if not hasattr(self, 'zkp') or self.zkp is None:
            if self.nodes:
                self.zkp = StaticZKPartitioner(
                    self.zk,
                    self.group,
                    self.topic,
                    self.nodes,
                    partitions_changed_cb=self.init_consumer,
                    logger=self.logger,
                    identifier=self.identifier)
            else:
                self.zkp = ZKPartitioner(
                    self.zk,
                    self.group,
                    self.topic,
                    time_boundary=self.jitter_seconds,
                    partitions_changed_cb=self.init_consumer,
                    logger=self.logger,
                    identifier=self.identifier)

        self._zkp_wait()

    def init_zk(self):
        # TODO: switch to async
        # 1. implement kazoo.interfaces.IHandler in terms of Tornado's IOLoop
        self.zk = KazooClient(hosts=self.zk_hosts, handler=self.zk_handler)
        self.zk.start()
        self.zk.add_listener(self.zk_session_watch)

        @self.zk.ChildrenWatch(self.broker_prefix)
        def broker_change_proxy(broker_ids):
            self.onBrokerChange(broker_ids)

        self.init_zkp()

    def onBrokerChange(self, broker_ids):
        self.broker_hosts = []
        for b_id in broker_ids:
            b_json, zstat = self.zk.get('/'.join([self.broker_prefix, b_id]))
            b_data = json.loads(b_json)
            self.broker_hosts.append('{}:{}'.format(b_data['host'],
                                                    b_data['port']))

        my_partitions = []
        if self.consumer is not None:
            self.logger.warn('Brokers changed, stopping Kafka consumer.')
            my_partitions = self.consumer.offsets.keys()
            self.consumer.stop()
            self.consumer = None
        if self.client is not None:
            self.logger.warn('Brokers changed, stopping Kafka client.')
            self.client.close()
            self.client = None

        if my_partitions:
            msg = 'Brokers changed, queuing restart of Kafka client / consumer.'
            self.logger.warn(msg)
            self.zk.handler.spawn(self.init_consumer, my_partitions)

    def init_consumer(self, my_partitions):
        if self.consumer is None:
            self.logger.warn('Starting Kafka client')
            self.client = KafkaClient(self.broker_hosts,
                                      client_id=self.zkp._identifier)
        else:
            if self.consumer is None or \
               sorted(my_partitions) != sorted(self.consumer.offsets.keys()):
                self.logger.warn(
                    'Partitions changed, restarting Kafka consumer.')
                self.consumer.stop()
            else:
                self.logger.info(
                    'Partitions unchanged, not restarting Kafka consumer.')
                return

        self.consumer = SimpleConsumer(self.client,
                                       self.group,
                                       self.topic,
                                       partitions=my_partitions,
                                       **self.consumer_kwargs)
        self.consumer.provide_partition_info()
        self.logger.info("Consumer connected to Kafka: %s",
                         self.consumer.offsets)

    def stop(self):
        if self.consumer is not None:
            self.logger.info('Stopping Kafka consumer')
            self.consumer.stop()
            self.consumer = None
        if self.client is not None:
            self.logger.info('Stopping Kafka client')
            self.client.close()
            self.client = None
        if self.zk is not None:
            self.logger.info('Stopping ZooKeeper client')
            if self.zkp is not None and not self.zkp.failed:
                self.zkp.finish()
                self.zk.stop()
            self.zkp = None
            self.zk = None

    def commit(self, partitions=None):
        """
        Commit offsets for this consumer

        partitions: list of partitions to commit, default is to commit
                    all of them
        """
        if self.consumer is None:
            return
        self.logger.debug('Begin committing offsets for partitions: %s',
                          partitions if partitions else 'All')
        self.consumer.commit(partitions)
        self.logger.debug('End committing offsets for partitions: %s',
                          partitions if partitions else 'All')

    def pending(self, partitions=None):
        """
        Gets the pending message count

        partitions: list of partitions to check for, default is to check all
        """
        return self.consumer.pending(partitions)

    def provide_partition_info(self):
        """
        Indicates that partition info must be returned by the consumer
        """
        self.consumer.provide_partition_info()

    def seek(self, offset, whence):
        """
        Alter the current offset in the consumer, similar to fseek

        offset: how much to modify the offset
        whence: where to modify it from
                0 is relative to the earliest available offset (head)
                1 is relative to the current offset
                2 is relative to the latest known offset (tail)
        """
        self.consumer.seek(offset, whence)

    def get_messages(self, count=1, block=True, timeout=0.1):
        """
        Fetch the specified number of messages

        count: Indicates the maximum number of messages to be fetched
        block: If True, the API will block till some messages are fetched.
        timeout: If block is True, the function will block for the specified
                 time (in seconds) until count messages is fetched. If None,
                 it will block forever.
        """
        if self.consumer is None:
            return []
        else:
            try:
                messages = self.consumer.get_messages(count, block, timeout)
                if not messages and self.zkp.failed:
                    raise FailedPayloadsError
                return messages
            except FailedPayloadsError as err:
                msg = 'Failed to retrieve payload, restarting consumer'
                self.logger.exception(msg)
                raise err

    def get_message(self, block=True, timeout=0.1, get_partition_info=None):
        return self.consumer.get_message(block, timeout, get_partition_info)

    def _get_message(self,
                     block=True,
                     timeout=0.1,
                     get_partition_info=None,
                     update_offset=True):
        return self.consumer._get_message(block, timeout, get_partition_info,
                                          update_offset)

    def __iter__(self):
        for msg in self.consumer:
            yield msg
Пример #6
0
class KafkaConsumer(KafkaBase, GeneratorBlock):

    """ A block for consuming Kafka messages
    """
    version = VersionProperty("1.0.0")
    group = StringProperty(title='Group', default="", allow_none=False)
    # use Kafka 'reasonable' value for our own message gathering and
    # signal delivery
    max_msg_count = IntProperty(title='Max message count',
                                default=AUTO_COMMIT_MSG_COUNT,
                                allow_none=False)

    def __init__(self):
        super().__init__()
        self._consumer = None
        self._encoded_group = None
        # message loop maintenance
        self._stop_message_loop_event = None
        self._message_loop_thread = None

    def configure(self, context):
        super().configure(context)

        if not len(self.group()):
            raise ValueError("Group cannot be empty")

        self._encoded_group = self.group().encode()
        self._connect()

    def start(self):
        super().start()
        # start gathering messages
        self._stop_message_loop_event = Event()
        self._message_loop_thread = spawn(self._receive_messages)

    def stop(self):
        # stop gathering messages
        self._stop_message_loop_event.set()
        self._message_loop_thread.join()
        self._message_loop_thread = None

        # disconnect
        self._disconnect()
        super().stop()

    def _parse_message(self, message):
            attrs = dict()
            attrs["magic"] = message.message.magic
            attrs["attributes"] = message.message.attributes
            attrs["key"] = message.message.key
            attrs["value"] = message.message.value

            return Signal(attrs)

    def _receive_messages(self):
        while not self._stop_message_loop_event.is_set():
            try:
                # get kafka messages
                messages = self._consumer.get_messages(
                    count=self.max_msg_count(), block=False)
            except Exception:
                self.logger.exception("Failure getting kafka messages")
                continue

            # if no timeout occurred, parse messages and convert to signals
            if messages:
                signals = []
                for message in messages:
                    # parse and save every signal
                    try:
                        signal = self._parse_message(message)
                    except Exception:
                        self.logger.exception("Failed to parse kafka message:"
                                              " '{0}'".format(message))
                        continue
                    signals.append(signal)

                self.notify_signals(signals)

        self.logger.debug("Exiting message loop")

    def _connect(self):
        super()._connect()
        self._consumer = SimpleConsumer(self._kafka,
                                        self._encoded_group,
                                        self._encoded_topic)

    def _disconnect(self):
        if self._consumer:
            self._consumer.stop()
            self._consumer = None
        super()._disconnect()

    @property
    def connected(self):
        return super().connected and self._consumer
Пример #7
0
        def run(self):
            client = None
            consumer = None
            try:
                prev = None
                print("Starting Kafka Client")
                print("Kafka topic: {}").format(self.topic)

                hosts = get_es_hosts()
                print "es hosts: ", hosts
                es = Elasticsearch(hosts=hosts)
                # ES_HOST = {"host": "localhost", "port": 9200}
                # es = Elasticsearch(hosts=[ES_HOST])

                # host_list = get_all_host('KAFKA')
                hosts = get_kafka_hosts()
                print "kafka hosts: ", hosts
                client = KafkaClient(hosts=hosts)
                # print "kafka client group name: "+self.groupName
                consumer = SimpleConsumer(client, self.groupName, self.topic)
                # consumer.seek --> offset=0, whence=current
                consumer.seek(-5, 2)
                print("Listening kafka message...")

                while self.stopCpu is False:
                    # for message in consumer.get_messages(count=5, block=False):
                    for message in consumer:
                        if self.stopCpu is True:
                            print("Kafka Consumer Listening Stopped")
                            break

                        if message:
                            print "Consuming kafka message: ", message
                            value = message.message.value
                            try:
                                json_value = json.loads(value)
                                offset = message.offset
                                json_value['data'][0]['offset'] = offset
                                value = json.dumps(json_value)
                                print "Publishing data: ", value

                                doc = json.dumps(json_value['data'][0])
                                if len(doc) > 0:
                                    es.index(index='kafka',
                                             doc_type=self.topic,
                                             id=offset,
                                             body=doc)

                                if len(value) > 0:
                                    self.parentOj.emit("ws" + str(self.pid),
                                                       value)
                            except Exception as e:
                                traceback.print_exc()
                                print "Skipping invalid message"
            except Exception as e:
                traceback.print_exc()
            finally:
                print("Listening kafka Stopped")
                print "Stopping consumer ..."
                consumer.stop()
                print "Closing client ..."
                client.close()
Пример #8
0
    def check(self, instance):
        consumer_groups = self.read_config(instance, 'consumer_groups',
                                           cast=self._validate_consumer_groups)
        kafka_host_ports = self.read_config(instance, 'kafka_connect_str')
        full_output = self.read_config(instance, 'full_output', cast=bool)
        dimensions = self.read_config(instance, 'dimensions', cast=dict, optional=True)
        new_dimensions = {'component': 'kafka', 'service': 'kafka'}
        if dimensions is not None:
            new_dimensions.update(dimensions.copy())

        try:
            # Connect to Kafka
            kafka_conn = KafkaClient(kafka_host_ports)

            # Query Kafka for consumer offsets
            consumer_offsets = {}
            topics = defaultdict(set)
            for consumer_group, topic_partitions in consumer_groups.iteritems():
                for topic, partitions in topic_partitions.iteritems():
                    consumer = SimpleConsumer(kafka_conn, consumer_group, topic)
                    # Remember the topic partitions that we've see so that we can
                    # look up their broker offsets later
                    topics[topic].update(set(partitions))
                    for partition in partitions:
                        try:
                            consumer_offsets[(consumer_group, topic, partition)] = consumer.offsets[partition]
                        except KeyError:
                            consumer.stop()
                            self.log.error('Error fetching consumer offset for {} partition {}'.format(topic, partition))
                    consumer.stop()

            # Query Kafka for the broker offsets, done in a separate loop so only one query is done
            # per topic even if multiple consumer groups watch the same topic
            broker_offsets = {}
            for topic, partitions in topics.items():
                offset_responses = []
                for p in partitions:
                    try:
                        response = kafka_conn.send_offset_request([OffsetRequest(topic, p, -1, 1)])
                        offset_responses.append(response[0])
                    except KafkaError as e:
                        self.log.error("Error fetching broker offset: {}".format(e))

                for resp in offset_responses:
                    broker_offsets[(resp.topic, resp.partition)] = resp.offsets[0]
        finally:
            try:
                kafka_conn.close()
            except Exception:
                self.log.exception('Error cleaning up Kafka connection')

        # Report the broker data
        if full_output:
            for (topic, partition), broker_offset in broker_offsets.items():
                broker_dimensions = new_dimensions.copy()
                broker_offset = broker_offsets.get((topic, partition))
                self.gauge('kafka.broker_offset',
                           broker_offset,
                           dimensions={'topic': topic,
                                       'partition': partition}.update(broker_dimensions))

        # Report the consumer data
        for (consumer_group, topic, partition), consumer_offset in consumer_offsets.items():
            # Get the broker offset
            broker_offset = broker_offsets.get((topic, partition))
            # Report the consumer offset and lag
            consumer_dimensions = new_dimensions.copy()
            consumer_dimensions['topic'] = topic
            consumer_dimensions['partition'] = partition
            consumer_dimensions['consumer_group'] = consumer_group
            if full_output:
                self.gauge('kafka.consumer_offset',
                           consumer_offset,
                           dimensions={'topic': topic,
                                       'partition': partition,
                                       'consumer_group': consumer_group}.update(consumer_dimensions))
            self.gauge('kafka.consumer_lag',
                       broker_offset - consumer_offset,
                       dimensions={'topic': topic,
                                   'partition': partition,
                                   'consumer_group': consumer_group}.update(consumer_dimensions))