Exemplo n.º 1
0
class ConsumerTask(object):
    def __init__(self, conf, topic_name):
        self.consumer = Consumer(conf)
        self.topic_name = topic_name
        self.running = True

    def stop(self):
        self.running = False

    def print_assignment(self, consumer, partition):
        print('Assignment: ', partition)

    def run(self):
        self.consumer.subscribe([self.topic_name],
                                on_assign=self.print_assignment)

        try:
            while True:
                msg = self.consumer.poll(10)
                if msg is None:
                    continue
                if msg.error():
                    raise KafkaException(msg.error())
                else:
                    print('%% %s [%d] at offset %d with key %s:\n' %
                          (msg.topic(), msg.partition(), msg.offset(),
                           str(msg.key())))
                    print(msg.value())
        except KeyboardInterrupt:
            sys.stderr.write("%% Aborted by user\n")
        finally:
            self.consumer.unsubscribe()
            self.consumer.close()
Exemplo n.º 2
0
class PubSubConsumerKafka(PubSubConsumer):
    def __init__(self,
                 conf: dict,
                 topic_names: List[str],
                 client_name: str,
                 group_name: str,
                 logger: logging.Logger = None):
        self.client_name = client_name
        self.__consumer_settings = {
            'bootstrap.servers': conf['bootstrap.servers'],
            'group.id': group_name,
            'client.id': client_name,
            'enable.auto.commit': True,
            'session.timeout.ms': 6000,
            'default.topic.config': {
                'auto.offset.reset': 'smallest'
            }
        }
        self.__consumer = None
        self.__set_consumer(topic_names)
        self.__log = logger

    def __set_consumer(self, topic_names: List[str]):
        if (len(topic_names) > 0):
            if (self.__consumer is None):
                self.__consumer = Consumer(self.__consumer_settings)
            #print(f"DEBUG: __set_consumer: {topic_names}")
            self.__consumer.subscribe(topic_names)
        else:
            if (self.__consumer is not None):
                self.__consumer.unsubscribe()

    def __log(self, msg: str):
        if (self.__logger is not None):
            self.__logger.debug(msg)

    def reset_topics(self, topic_names: List[str]):
        self.__set_consumer(topic_names)

    # returns a pair (topic, msg)
    def poll(self, timeout=None) -> (str, object):
        if (timeout is None):
            timeout = 1

        if (self.__consumer is None):
            return (None, None)

        msg = self.__consumer.poll(timeout)
        if (msg is None):
            return (None, None)
        elif not msg.error():
            #print(f"DEBUG: PubSubConsumerKafka: Message Received: {msg.value()[0:256]} ...")
            return (msg.topic(), jsonpickle.decode(msg.value()))
        elif msg.error().code() == KafkaError._PARTITION_EOF:
            self.__log(
                f"End of partition reached {msg.topic()}/{msg.partition()}")
            return (None, None)
        else:
            self.__log(f"Error occured: {msg.error().str()}")
            return (None, None)
def test_any_method_after_close_throws_exception():
    """ Calling any consumer method after close should thorw a RuntimeError
    """
    c = Consumer({'group.id': 'test',
                  'enable.auto.commit': True,
                  'enable.auto.offset.store': False,
                  'socket.timeout.ms': 50,
                  'session.timeout.ms': 100})

    c.subscribe(["test"])
    c.unsubscribe()
    c.close()

    with pytest.raises(RuntimeError) as ex:
        c.subscribe(['test'])
    assert 'Consumer closed' == str(ex.value)

    with pytest.raises(RuntimeError) as ex:
        c.unsubscribe()
    assert 'Consumer closed' == str(ex.value)

    with pytest.raises(RuntimeError) as ex:
        c.poll()
    assert 'Consumer closed' == str(ex.value)

    with pytest.raises(RuntimeError) as ex:
        c.consume()
    assert 'Consumer closed' == str(ex.value)

    with pytest.raises(RuntimeError) as ex:
        c.assign([TopicPartition('test', 0)])
    assert 'Consumer closed' == str(ex.value)

    with pytest.raises(RuntimeError) as ex:
        c.unassign()
    assert 'Consumer closed' == str(ex.value)

    with pytest.raises(RuntimeError) as ex:
        c.assignment()
    assert 'Consumer closed' == str(ex.value)

    with pytest.raises(RuntimeError) as ex:
        c.commit()
    assert 'Consumer closed' == str(ex.value)

    with pytest.raises(RuntimeError) as ex:
        c.committed([TopicPartition("test", 0)])
    assert 'Consumer closed' == str(ex.value)

    with pytest.raises(RuntimeError) as ex:
        c.position([TopicPartition("test", 0)])
    assert 'Consumer closed' == str(ex.value)

    with pytest.raises(RuntimeError) as ex:
        c.seek([TopicPartition("test", 0, 0)])
    assert 'Consumer closed' == str(ex.value)

    with pytest.raises(RuntimeError) as ex:
        lo, hi = c.get_watermark_offsets(TopicPartition("test", 0))
    assert 'Consumer closed' == str(ex.value)
Exemplo n.º 4
0
class ConsumerTask(object):

    def __init__(self, is_bluemix, conf, topic_name):
        try:
            from confluent_kafka import Consumer
        except:
            from confluent_kafka_prebuilt import Consumer
        self.consumer = Consumer(conf)
        self.topic_name = topic_name
        self.running = True

    def stop(self):
        self.running = False

    @asyncio.coroutine
    def run(self):
        print('The consumer has started')
        self.consumer.subscribe([self.topic_name])
        while self.running:
            msg = self.consumer.poll(1)
            if msg is not None and msg.error() is None:
                print('Message consumed: topic={0}, partition={1}, offset={2}, key={3}, value={4}'.format(
                    msg.topic(),
                    msg.partition(),
                    msg.offset(),
                    msg.key().decode('utf-8'),
                    msg.value().decode('utf-8')))
            else:
                print('No messages consumed')
            yield from asyncio.sleep(2)
        self.consumer.unsubscribe()
        self.consumer.close()
def test_any_method_after_close_throws_exception():
    """ Calling any consumer method after close should thorw a RuntimeError
    """
    c = Consumer({'group.id': 'test',
                  'enable.auto.commit': True,
                  'enable.auto.offset.store': False,
                  'socket.timeout.ms': 50,
                  'session.timeout.ms': 100})

    c.subscribe(["test"])
    c.unsubscribe()
    c.close()

    with pytest.raises(RuntimeError) as ex:
        c.subscribe(['test'])
    assert ex.match('Consumer closed')

    with pytest.raises(RuntimeError) as ex:
        c.unsubscribe()
    assert ex.match('Consumer closed')

    with pytest.raises(RuntimeError) as ex:
        c.poll()
    assert ex.match('Consumer closed')

    with pytest.raises(RuntimeError) as ex:
        c.consume()
    assert ex.match('Consumer closed')

    with pytest.raises(RuntimeError) as ex:
        c.assign([TopicPartition('test', 0)])
    assert ex.match('Consumer closed')

    with pytest.raises(RuntimeError) as ex:
        c.unassign()
    assert ex.match('Consumer closed')

    with pytest.raises(RuntimeError) as ex:
        c.assignment()
    assert ex.match('Consumer closed')

    with pytest.raises(RuntimeError) as ex:
        c.commit()
    assert ex.match('Consumer closed')

    with pytest.raises(RuntimeError) as ex:
        c.committed([TopicPartition("test", 0)])
    assert ex.match('Consumer closed')

    with pytest.raises(RuntimeError) as ex:
        c.position([TopicPartition("test", 0)])
    assert ex.match('Consumer closed')

    with pytest.raises(RuntimeError) as ex:
        c.seek([TopicPartition("test", 0, 0)])
    assert ex.match('Consumer closed')

    with pytest.raises(RuntimeError) as ex:
        lo, hi = c.get_watermark_offsets(TopicPartition("test", 0))
    assert ex.match('Consumer closed')
def test_basic_api():
    """ Basic API tests, these wont really do anything since there is no
        broker configured. """

    try:
        kc = Consumer()
    except TypeError as e:
        assert str(e) == "expected configuration dict"

    def dummy_commit_cb (err, partitions):
        pass

    kc = Consumer({'group.id':'test', 'socket.timeout.ms':'100',
                   'session.timeout.ms': 1000, # Avoid close() blocking too long
                   'on_commit': dummy_commit_cb})

    kc.subscribe(["test"])
    kc.unsubscribe()

    def dummy_assign_revoke (consumer, partitions):
        pass

    kc.subscribe(["test"], on_assign=dummy_assign_revoke, on_revoke=dummy_assign_revoke)
    kc.unsubscribe()

    msg = kc.poll(timeout=0.001)
    if msg is None:
        print('OK: poll() timeout')
    elif msg.error():
        print('OK: consumer error: %s' % msg.error().str())
    else:
        print('OK: consumed message')

    partitions = list(map(lambda p: TopicPartition("test", p), range(0,100,3)))
    kc.assign(partitions)

    kc.unassign()

    kc.commit(async=True)

    try:
        kc.commit(async=False)
    except KafkaException as e:
        assert e.args[0].code() in (KafkaError._TIMED_OUT, KafkaError._NO_OFFSET)

    # Get current position, should all be invalid.
    kc.position(partitions)
    assert len([p for p in partitions if p.offset == -1001]) == len(partitions)

    try:
        offsets = kc.committed(partitions, timeout=0.001)
    except KafkaException as e:
        assert e.args[0].code() == KafkaError._TIMED_OUT


    kc.close()
class MessagehubStreamingAdapterConfluent(StreamingDataAdapter):
    def __init__(self, topic, username, password, prod=True):
        caLocation = '/etc/ssl/cert.pem'
        if not os.path.exists(caLocation):
            caLocation = '/etc/pki/tls/cert.pem'
        conf = {
            'client.id': 'pixieapp.client.id',
            'group.id': 'pixieapp.group',
            'security.protocol': 'SASL_SSL',
            'sasl.mechanisms': 'PLAIN',
            'ssl.ca.location': caLocation,
            "bootstrap.servers": ','.join(["kafka0{}-{}.messagehub.services.us-south.bluemix.net:9093".format(i, "prod01" if prod else "stage1") for i in range(1,6)]),
            "sasl.username": username,
            "sasl.password": password,
            'api.version.request': True
        }
        self.consumer = Consumer(conf)
        self.consumer.subscribe([topic])
        self.schema = {}
        self.sampleDocCount = 0
        
    def close(self):
        self.consumer.unsubscribe()
        self.consumer.close() 
        
    def tryCast(self, value, t):
        try:
            return t(value)
        except:
            return None
        
    def inferType(self, value):
        if isinstance(value, string_types):
            value = self.tryCast(value, int) or self.tryCast(value, long) or self.tryCast(value, float) or value
        return "integer" if value.__class__==int else "float" if value.__class__ == float else "string"
        
    def inferSchema(self, eventJSON):
        if self.sampleDocCount > 20:
            return
        for key,value in iteritems(eventJSON):
            if not key in self.schema:
                self.schema[key] = self.inferType(value)
        self.sampleDocCount = self.sampleDocCount + 1 
    
    def doGetNextData(self):
        msgs = []
        msg = self.consumer.poll(1)
        if msg is not None and msg.error() is None:
            jsonValue = json.loads(msg.value())
            self.inferSchema(json.loads(msg.value()))
            msgs.append(jsonValue)
        return msgs
    
    def close(self):
        self.consumer.close()
Exemplo n.º 8
0
def repl():
    c = Consumer(settings)
    c.subscribe(topics)
    try:
        while True:
            if not red.ping():
                time.sleep(1)
                continue
            msg = c.poll(0.1)
            # No message present
            if msg is None:
                continue
            # Found message
            elif not msg.error():
                # Try to handle
                if msg.topic() == u'add_build':
                    result = add_build(msg.value())
                elif msg.topic() == u'delete_build':
                    result = delete_build(msg.value())
                elif msg.topic() == u'add_user':
                    result = add_user(msg.value())
                elif msg.topic() == u'delete_user':
                    result = delete_user(msg.value())
                elif msg.topic() == u'add_build_component':
                    result = add_build_component(msg.value())
                elif msg.topic() == u'remove_build_component':
                    result = remove_build_component(msg.value())
                elif msg.topic() == u'add_decoration':
                    result = add_decoration(msg.value())
                elif msg.topic() == u'remove_decoration':
                    result = remove_decoration(msg.value())
                elif msg.topic() == u'remove_all_decorations':
                    result = remove_all_decorations(msg.value())
                if result:
                    pprint('Success ' + msg.value())
                    c.commit()
                else:
                    c.unsubscribe()
                    c.subscribe(topics)
                    print('Error Occurred Adding to Redis')
            elif msg.error().code() == KafkaError._PARTITION_EOF:
                print('End of partition reached {0}/{1}'.format(
                    msg.topic(), msg.partition()))
            else:
                print('Error occurred: {0}'.format(msg.error().str()))
            time.sleep(1)

    except KeyboardInterrupt:
        pass

    finally:
        c.close()
def test_multiple_close_does_not_throw_exception():
    """ Calling Consumer.close() multiple times should not throw Runtime Exception
    """
    c = Consumer({'group.id': 'test',
                  'enable.auto.commit': True,
                  'enable.auto.offset.store': False,
                  'socket.timeout.ms': 50,
                  'session.timeout.ms': 100})

    c.subscribe(["test"])

    c.unsubscribe()
    c.close()
    c.close()
Exemplo n.º 10
0
class Consumer:
    def __init__(self, bootstrap_servers: str, topic: str,
                 group_id: str) -> None:
        config = {
            'bootstrap.servers': bootstrap_servers,
            # Where to consume from after a reset
            # "latest" is the end of the topic, "earliest" is the beginning
            'default.topic.config': {
                'auto.offset.reset': 'latest'
            },
            'metadata.request.timeout.ms': 20000,
            'enable.auto.commit': False,
            'group.id': group_id,
            'api.version.request': True,
            'fetch.wait.max.ms': 100,
            'log.connection.close': False,
            # This logger will log messages originating from non-Python code
            'logger': get_logger('librdkafka'),
            # Max number of bytes per partition returned by the server
            'max.partition.fetch.bytes': MEBIBYTE * 5,
            'statistics.interval.ms': 15000,
            'queued.max.messages.kbytes': 1024 * 64,
        }
        self._consumer = ConfluentConsumer(config)
        self._consumer.subscribe([topic])

    def consume(self) -> str:
        while True:
            msg = self._consumer.poll(1.0)

            if msg is None:
                continue
            if msg.error() is None:
                log.debug(f'Received message: {msg.value().decode("utf-8")}')
                return msg.value().decode('utf-8')
            elif msg.error().code() != KafkaError._PARTITION_EOF:
                log.error(
                    f'Failed to consume from topic, continuing... '
                    f'Reason: {KafkaException(msg.error())}', )
            else:
                log.debug('Reached end of topic, waiting for new messages...')

    def commit(self) -> None:
        self._consumer.commmit()

    def close(self) -> None:
        self._consumer.unsubscribe()
        self._consumer.close()
Exemplo n.º 11
0
def test_multiple_close_throw_exception():
    """ Calling Consumer.close() multiple times should throw Runtime Exception
    """
    c = Consumer({'group.id': 'test',
                  'enable.auto.commit': True,
                  'enable.auto.offset.store': False,
                  'socket.timeout.ms': 50,
                  'session.timeout.ms': 100})

    c.subscribe(["test"])

    c.unsubscribe()
    c.close()

    with pytest.raises(RuntimeError) as ex:
        c.close()
        assert 'Consumer already closed' == str(ex.value)
def test_multiple_close_throw_exception():
    """ Calling Consumer.close() multiple times should throw Runtime Exception
    """
    c = Consumer({'group.id': 'test',
                  'enable.auto.commit': True,
                  'enable.auto.offset.store': False,
                  'socket.timeout.ms': 50,
                  'session.timeout.ms': 100})

    c.subscribe(["test"])

    c.unsubscribe()
    c.close()

    with pytest.raises(RuntimeError) as ex:
        c.close()
    assert 'Consumer already closed' == str(ex.value)
Exemplo n.º 13
0
def test_store_offsets():
    """ Basic store_offsets() tests """

    c = Consumer({'group.id': 'test',
                  'enable.auto.commit': True,
                  'enable.auto.offset.store': False,
                  'socket.timeout.ms': 50,
                  'session.timeout.ms': 100})

    c.subscribe(["test"])

    try:
        c.store_offsets(offsets=[TopicPartition("test", 0, 42)])
    except KafkaException as e:
        assert e.args[0].code() == KafkaError._UNKNOWN_PARTITION

    c.unsubscribe()
    c.close()
Exemplo n.º 14
0
def test_calling_store_offsets_after_close_throws_erro():
    """ calling store_offset after close should throw RuntimeError """

    c = Consumer({
        'group.id': 'test',
        'enable.auto.commit': True,
        'enable.auto.offset.store': False,
        'socket.timeout.ms': 50,
        'session.timeout.ms': 100
    })

    c.subscribe(["test"])
    c.unsubscribe()
    c.close()

    with pytest.raises(RuntimeError) as ex:
        c.store_offsets(offsets=[TopicPartition("test", 0, 42)])
    assert 'Consumer closed' == str(ex.value)
def test_store_offsets():
    """ Basic store_offsets() tests """

    c = Consumer({'group.id': 'test',
                  'enable.auto.commit': True,
                  'enable.auto.offset.store': False,
                  'socket.timeout.ms': 50,
                  'session.timeout.ms': 100})

    c.subscribe(["test"])

    try:
        c.store_offsets(offsets=[TopicPartition("test", 0, 42)])
    except KafkaException as e:
        assert e.args[0].code() == KafkaError._UNKNOWN_PARTITION

    c.unsubscribe()
    c.close()
Exemplo n.º 16
0
class ConsumerTask(object):
    def __init__(self, conf, topic_name):
        self.consumer = Consumer(conf)
        self.topic_name = topic_name
        self.running = True
        self._observers = []

    def stop(self):
        self.running = False

    def print_assignment(self, consumer, partition):
        print('Assignment - subscribing to topic: ', partition)

    def register_observer(self, observer):
        self._observers.append(observer)

    def notify_observers(self, *args, **kwargs):
        for observer in self._observers:
            observer.notify(self, *args, **kwargs)

    def run(self):
        self.consumer.subscribe([self.topic_name],
                                on_assign=self.print_assignment)

        try:
            while True:
                msg = self.consumer.poll(1)
                if msg is None:
                    continue
                if msg.error():
                    raise KafkaException(msg.error())
                else:
                    sys.stderr.write('%% %s [%d] at offset %d with key %s:\n' %
                                     (msg.topic(), msg.partition(),
                                      msg.offset(), str(msg.key())))
                    #print(msg.value())
                    self.notify_observers(msg.topic())

                    #could add something here that will tell the widget / UI to go to Object Storage
        except KeyboardInterrupt:
            sys.stderr.write("%% Aborted by user\n")
        finally:
            self.consumer.unsubscribe()
            self.consumer.close()
def test_calling_store_offsets_after_close_throws_erro():
    """ calling store_offset after close should throw RuntimeError """

    c = Consumer({'group.id': 'test',
                  'enable.auto.commit': True,
                  'enable.auto.offset.store': False,
                  'socket.timeout.ms': 50,
                  'session.timeout.ms': 100})

    c.subscribe(["test"])
    c.unsubscribe()
    c.close()

    with pytest.raises(RuntimeError) as ex:
        c.store_offsets(offsets=[TopicPartition("test", 0, 42)])
    assert 'Consumer closed' == str(ex.value)

    with pytest.raises(RuntimeError) as ex:
        c.offsets_for_times([TopicPartition("test", 0)])
    assert 'Consumer closed' == str(ex.value)
Exemplo n.º 18
0
class ConsumerTask(object):
    def __init__(self, conf, topic_name):
        self.consumer = Consumer(conf)
        self.topic_name = topic_name
        self.running = True

    def stop(self):
        self.running = False

    @asyncio.coroutine
    def run(self):
        print('The consumer has started')
        self.consumer.subscribe([self.topic_name])
        while self.running:
            msg = self.consumer.poll(1)
            if msg is not None and msg.error() is None:
                pprint("Message consumed: offset={0}, value={1}".format(
                    msg.offset(), msg.value()))
            else:
                print('No messages consumed')
                yield from asyncio.sleep(2)
        self.consumer.unsubscribe()
        self.consumer.close()
Exemplo n.º 19
0
def repl():
    c = Consumer(settings)
    c.subscribe([topic])
    db.connect()
    try:
        while True:
            if not db.ping():
                db.connect()
                continue
            msg = c.poll(0.1)
            # No message present
            if msg is None:
                continue
            # Found a message
            elif not msg.error():
                # Try to insert
                result = insertArmor(msg.value())
                if result:
                    pprint('Added Successfully ' + msg.value())
                    c.commit()
                else:
                    c.unsubscribe()
                    c.subscribe([topic])
                    print('Error Occurred Adding to Cassandra')
            elif msg.error().code() == KafkaError._PARTITION_EOF:
                print('End of partition reached {0}/{1}'.format(
                    msg.topic(), msg.partition()))
            else:
                print('Error occurred: {0}'.format(msg.error().str()))
            time.sleep(1)

    except KeyboardInterrupt:
        pass

    finally:
        c.close()
Exemplo n.º 20
0
class KafkaConsumer:
    """Defines the base kafka consumer class"""
    def __init__(
        self,
        topic_name_pattern,
        message_handler,
        is_avro=True,
        offset_earliest=False,
        sleep_secs=1.0,
        consume_timeout=0.1,
    ):
        """Creates a consumer object for asynchronous use"""
        self.topic_name_pattern = topic_name_pattern
        self.message_handler = message_handler
        self.sleep_secs = sleep_secs
        self.consume_timeout = consume_timeout
        self.offset_earliest = offset_earliest

        self.broker_properties = {
            "bootstrap.servers":
            "PLAINTEXT://localhost:9092,PLAINTEXT://localhost:9093,PLAINTEXT://localhost:9094",
            "group.id": "00",
        }

        if is_avro:
            self.broker_properties[
                "schema.registry.url"] = "http://localhost:8081"
            self.consumer = AvroConsumer(self.broker_properties)
        else:
            self.consumer = Consumer(self.broker_properties)

        self.consumer.subscribe([self.topic_name_pattern],
                                on_assign=self.on_assign)
        logger.info(f"{self.topic_name_pattern} subscribed!")

    def on_assign(self, consumer, partitions):
        """Callback for when topic assignment takes place"""
        if self.offset_earliest:
            for partition in partitions:
                partition.offset = 0
        consumer.assign(partitions, )
        logger.info("partitions assigned for %s", self.topic_name_pattern)

    async def consume(self):
        """Asynchronously consumes data from kafka topic"""
        while True:
            num_results = 1
            while num_results > 0:
                num_results = self._consume()
            await gen.sleep(self.sleep_secs)

    def _consume(self):
        """Polls for a message. Returns 1 if a message was received, 0 otherwise"""
        message = self.consumer.poll(1.0)
        if message is None:
            logger.debug("no message received by consumer %s",
                         self.topic_name_pattern)
            return 0
        elif message.error() is not None:
            logger.info(f"error from consumer {message.error()}")
        else:
            self.message_handler(message)
            return 1

    def close(self):
        """Cleans up any open kafka consumers"""
        self.consumer.unsubscribe()
        logger.info(f"unsubscribed from {self.topic_name_pattern}")
        self.consumer.close()
Exemplo n.º 21
0
def test_basic_api():
    """ Basic API tests, these wont really do anything since there is no
        broker configured. """

    try:
        kc = Consumer()
    except TypeError as e:
        assert str(e) == "expected configuration dict"

    def dummy_commit_cb(err, partitions):
        pass

    kc = Consumer({
        'group.id': 'test',
        'socket.timeout.ms': '100',
        'session.timeout.ms': 1000,  # Avoid close() blocking too long
        'on_commit': dummy_commit_cb
    })

    kc.subscribe(["test"])
    kc.unsubscribe()

    def dummy_assign_revoke(consumer, partitions):
        pass

    kc.subscribe(["test"],
                 on_assign=dummy_assign_revoke,
                 on_revoke=dummy_assign_revoke)
    kc.unsubscribe()

    msg = kc.poll(timeout=0.001)
    if msg is None:
        print('OK: poll() timeout')
    elif msg.error():
        print('OK: consumer error: %s' % msg.error().str())
    else:
        print('OK: consumed message')

    if msg is not None:
        assert msg.timestamp() == (TIMESTAMP_NOT_AVAILABLE, -1)

    msglist = kc.consume(num_messages=10, timeout=0.001)
    assert len(msglist) == 0, "expected 0 messages, not %d" % len(msglist)

    with pytest.raises(ValueError) as ex:
        kc.consume(-100)
    assert 'num_messages must be between 0 and 1000000 (1M)' == str(ex.value)

    with pytest.raises(ValueError) as ex:
        kc.consume(1000001)
    assert 'num_messages must be between 0 and 1000000 (1M)' == str(ex.value)

    partitions = list(
        map(lambda part: TopicPartition("test", part), range(0, 100, 3)))
    kc.assign(partitions)

    with pytest.raises(KafkaException) as ex:
        kc.seek(TopicPartition("test", 0, 123))
    assert 'Erroneous state' in str(ex.value)

    # Verify assignment
    assignment = kc.assignment()
    assert partitions == assignment

    # Pause partitions
    kc.pause(partitions)

    # Resume partitions
    kc.resume(partitions)

    # Get cached watermarks, should all be invalid.
    lo, hi = kc.get_watermark_offsets(partitions[0], cached=True)
    assert lo == -1001 and hi == -1001
    assert lo == OFFSET_INVALID and hi == OFFSET_INVALID

    # Query broker for watermarks, should raise an exception.
    try:
        lo, hi = kc.get_watermark_offsets(partitions[0],
                                          timeout=0.5,
                                          cached=False)
    except KafkaException as e:
        assert e.args[0].code() in (KafkaError._TIMED_OUT, KafkaError._WAIT_COORD, KafkaError.LEADER_NOT_AVAILABLE),\
            str(e.args([0]))

    kc.unassign()

    kc.commit(asynchronous=True)

    try:
        kc.commit(asynchronous=False)
    except KafkaException as e:
        assert e.args[0].code() in (KafkaError._TIMED_OUT,
                                    KafkaError._NO_OFFSET)

    # Get current position, should all be invalid.
    kc.position(partitions)
    assert len([p for p in partitions
                if p.offset == OFFSET_INVALID]) == len(partitions)

    try:
        kc.committed(partitions, timeout=0.001)
    except KafkaException as e:
        assert e.args[0].code() == KafkaError._TIMED_OUT

    try:
        kc.list_topics(timeout=0.2)
    except KafkaException as e:
        assert e.args[0].code() in (KafkaError._TIMED_OUT,
                                    KafkaError._TRANSPORT)

    try:
        kc.list_topics(topic="hi", timeout=0.1)
    except KafkaException as e:
        assert e.args[0].code() in (KafkaError._TIMED_OUT,
                                    KafkaError._TRANSPORT)

    kc.close()
Exemplo n.º 22
0
    stop_consumer.subscribe(['kill_all_consumers'])

    producer_conf = {'bootstrap.servers': args.broker}
    producer = Producer(**conf)

    while True:
        consumer.subscribe([args.topic])
        stop_msg = stop_consumer.poll(timeout=1.0)
        if stop_msg is not None:
            if stop_msg.value() == b"STOP":
                print("Received STOP message")
                break

        msg = consumer.poll(timeout=1.0)
        if msg is None:
            continue
        if msg.error():
            raise KafkaException(msg.error())
        else:
            # Proper message
            sys.stderr.write(
                '%% %s [%d] at offset %d with key %s:\n' %
                (msg.topic(), msg.partition(), msg.offset(), str(msg.key())))
        consumer.unsubscribe()
        msg_data = json.loads(msg.value())
        print(f'Doing job {msg_data["id"]}')
        sleep(int(msg_data['job_length']))
        report_job_done(producer, msg_data['id'])

    consumer.close()
Exemplo n.º 23
0
class BusConsumer:
    def __init__(self, groupid=None):

        # Pre-shared credentials
        # self.credentials = json.load(open('bus_credentials.json'))

        self.credentials = load_credentials.LoadCredentials.load_bus_credentials(
        )

        # Construct required configuration
        self.configuration = {
            'client.id': 'VAL_consumer',
            'group.id': 'VAL_consumer_group',
            'bootstrap.servers':
            ','.join(self.credentials['kafka_brokers_sasl']),
            'security.protocol': 'SASL_SSL',
            'ssl.ca.location': '/etc/ssl/certs',
            'sasl.mechanisms': 'PLAIN',
            'sasl.username': self.credentials['api_key'][0:16],
            'sasl.password': self.credentials['api_key'][16:48],
            'api.version.request': True
        }

        if groupid is not None:
            self.configuration["group.id"] = groupid

        self.consumer = Consumer(self.configuration)

        self.listening = True

        self.database = 'messages.sqlite'

        self.default_topics = [TOP803, TOP030]

    def listen(self, performed_action, topics=None):
        # Topics should be a list of topic names e.g. ['topic1', 'topic2']
        if topics is None:
            topics = self.default_topics

        self.listening = True

        # Subscribe to topics
        try:
            self.consumer.subscribe(topics)
        except Exception as e:
            logger.error("Error @ BusConsumer.listen()")
            logger.debug(str(type(e)) + str(e))
            return False
        logger.info("listener subscribed successfully to topics:" +
                    str(topics))

        # Initiate a loop for continuous listening
        while self.listening:
            msg = self.consumer.poll(0)

            # If a message is received and it is not an error message
            if msg is not None and msg.error() is None:

                # Add incoming message to requests database
                try:
                    message_text = msg.value().decode('utf-8')
                except:
                    message_text = msg.value()

                performed_action(message_text)

            # TODO: check if it works ok with the sleep .5
            time.sleep(0.5)

        # Unsubscribe and close consumer
        self.consumer.unsubscribe()
        self.consumer.close()

    def stop(self):
        self.listening = False

    @staticmethod
    def __load_dummy_messages():
        """ Load Vicenza messages and add them to the message queue with a small delay between each insertion """
        import random
        import filter_messages

        max_delay = 0.01  # delay in the range [0, max_delay] from uniform distribution

        vic_messages = filter_messages.simulateData()
        for m in vic_messages:
            logger.debug("writing TOP101 message to queue")
            message_queue.MessageQueue.put_message(
                m)  # Note: pass it by value, not reference!
            ValidatorThreadHandler.init_validator()
            time.sleep(random.random() * max_delay)

        ValidatorThreadHandler.join_validation_thread()

    @staticmethod
    def __continuously_add_fake_TOP101():
        import random
        fake_msg = dict()
        fake_msg['body'] = dict()
        fake_msg['body']['spam'] = False
        fake_msg['body']['incidentID'] = random.randint(0, 1000000)
        p = 1
        while True:
            if random.random() > 1:
                p += 1
                fake_msg['body'][
                    'incidentID'] = p  # random.randint(0, 1000000)
                print("message in queue. ID: ", p)
                message_queue.MessageQueue.put_message(
                    {'body': {
                        'spam': False,
                        'incidentID': p
                    }})
            ValidatorThreadHandler.init_validator()

    @staticmethod
    def __load_TOP030():
        """ load TOP030 messages from local file and put them in the message queue """
        import random
        import filter_messages

        max_delay = .1  # delay in the range [0, max_delay] from uniform distribution

        vic_messages = filter_messages.get030()
        for m in vic_messages:
            logger.debug("writing TOP030 message to queue")
            message_queue.MessageQueue.put_message(
                m)  # Note: pass it by value, not reference!
            ValidatorThreadHandler.init_validator()
            time.sleep(random.random() * max_delay)

        ValidatorThreadHandler.join_validation_thread()
Exemplo n.º 24
0
class KafkaReader(MQReader):
    @staticmethod
    def decode(args):
        k, v = args
        return b2s(k), int(v)

    @staticmethod
    def get_value(msg):
        return msg.value()

    @staticmethod
    def get_token(msg):
        return {f'{msg.topic()}.{msg.partition()}': msg.offset() + 1}

    def __init__(self, topics, group_id='group-1', client_id='default',
                 bootstrap_servers=BOOTSTRAP_SERVERS,
                 is_bootstrap=True, is_resume=True):
        """从 Kafka 读取数据
        :param topics: list, kafka topics
        :param group_id: str, kafka topics group_id
        :param client_id: str, kafka topics client_id
        :param bootstrap_servers: kafka host
        :param is_bootstrap: 是否全量读取
        :param is_resume: 是否断点续传
        """
        if not isinstance(topics, (list, tuple)):
            topics = [topics]
        self.topics = topics
        self.group_id = group_id
        self.client_id = client_id

        super().__init__(f'kafka:{self.client_id}:{self.group_id}',
                         is_bootstrap=is_bootstrap, is_resume=is_resume)

        self.config = {
            'client.id': self.client_id,
            'group.id': self.group_id,
            'bootstrap.servers': bootstrap_servers,
            'broker.version.fallback': BROKER_VERSION,
            'compression.type': COMPRESSION_TYPE,
            'enable.auto.commit': True,
            'auto.offset.reset': 'earliest' if is_bootstrap else 'latest',
            'on_commit': self.on_commit,
        }
        self.consumer = Consumer(self.config)

    def on_commit(self, err, partitions):
        for part in partitions:
            if err is not None:
                logger.error(f'Message delivery failed: {err}')
                logger.error(
                    f'topic={part.topic} partition={part.partition} '
                    f'offset={part.offset}'
                )
            else:
                # logger.info(
                #     f'topic={part.topic} partition={part.partition} '
                #     f'offset={part.offset}'
                # )
                key = f'{part.topic}.{part.partition}'

                if part.offset != OFFSET_INVALID:
                    self.resume_token[key] = part.offset

    def read(self):
        """从 kafka 读取数据
        https://github.com/confluentinc/confluent-kafka-python/issues/201
        :return:
        """
        # 重置 offset
        def on_assign(consumer, partitions):
            consumer.assign(partitions)
            for part in partitions:
                key = f'{part.topic}.{part.partition}'
                part.offset = self.resume_token.get(key) or 0
            consumer.commit(offsets=partitions, asynchronous=False)

        # 取消订阅
        if self.is_resume:
            self.consumer.subscribe(self.topics, on_assign=on_assign)
        else:
            self.consumer.subscribe(self.topics)
        logger.info(f'Kafka consumer subscribe {self.topics}')

        while True:
            try:
                msg = self.consumer.poll(1)
            except RuntimeError as e:
                logger.error(f'RuntimeError:{e}')
                break
            except KeyboardInterrupt:
                logger.error('KeyboardInterrupt')
                break

            if msg is None:
                continue
            if msg.error():
                logger.error(msg.error())
                continue

            # logger.info(f'{msg.topic()} {msg.partition()} {msg.offset()}')
            yield msg

        # 取消订阅
        try:
            self.consumer.unsubscribe()
        except RuntimeError:
            pass
        logger.info(f'Kafka reader {self.topics} unsubscribe')

    def disconnect(self):
        try:
            self.consumer.close()
        except RuntimeError:
            pass
def test_basic_api():
    """ Basic API tests, these wont really do anything since there is no
        broker configured. """

    try:
        kc = Consumer()
    except TypeError as e:
        assert str(e) == "expected configuration dict"

    def dummy_commit_cb(err, partitions):
        pass

    kc = Consumer({'group.id': 'test', 'socket.timeout.ms': '100',
                   'session.timeout.ms': 1000,  # Avoid close() blocking too long
                   'on_commit': dummy_commit_cb})

    kc.subscribe(["test"])
    kc.unsubscribe()

    def dummy_assign_revoke(consumer, partitions):
        pass

    kc.subscribe(["test"], on_assign=dummy_assign_revoke, on_revoke=dummy_assign_revoke)
    kc.unsubscribe()

    msg = kc.poll(timeout=0.001)
    if msg is None:
        print('OK: poll() timeout')
    elif msg.error():
        print('OK: consumer error: %s' % msg.error().str())
    else:
        print('OK: consumed message')

    if msg is not None:
        assert msg.timestamp() == (TIMESTAMP_NOT_AVAILABLE, -1)

    msglist = kc.consume(num_messages=10, timeout=0.001)
    assert len(msglist) == 0, "expected 0 messages, not %d" % len(msglist)

    with pytest.raises(ValueError) as ex:
        kc.consume(-100)
    assert 'num_messages must be between 0 and 1000000 (1M)' == str(ex.value)

    with pytest.raises(ValueError) as ex:
        kc.consume(1000001)
    assert 'num_messages must be between 0 and 1000000 (1M)' == str(ex.value)

    partitions = list(map(lambda part: TopicPartition("test", part), range(0, 100, 3)))
    kc.assign(partitions)

    with pytest.raises(KafkaException) as ex:
        kc.seek(TopicPartition("test", 0, 123))
    assert 'Erroneous state' in str(ex.value)

    # Verify assignment
    assignment = kc.assignment()
    assert partitions == assignment

    # Pause partitions
    kc.pause(partitions)

    # Resume partitions
    kc.resume(partitions)

    # Get cached watermarks, should all be invalid.
    lo, hi = kc.get_watermark_offsets(partitions[0], cached=True)
    assert lo == -1001 and hi == -1001
    assert lo == OFFSET_INVALID and hi == OFFSET_INVALID

    # Query broker for watermarks, should raise an exception.
    try:
        lo, hi = kc.get_watermark_offsets(partitions[0], timeout=0.5, cached=False)
    except KafkaException as e:
        assert e.args[0].code() in (KafkaError._TIMED_OUT, KafkaError._WAIT_COORD, KafkaError.LEADER_NOT_AVAILABLE),\
            str(e.args([0]))

    kc.unassign()

    kc.commit(asynchronous=True)

    try:
        kc.commit(asynchronous=False)
    except KafkaException as e:
        assert e.args[0].code() in (KafkaError._TIMED_OUT, KafkaError._NO_OFFSET)

    # Get current position, should all be invalid.
    kc.position(partitions)
    assert len([p for p in partitions if p.offset == OFFSET_INVALID]) == len(partitions)

    try:
        kc.committed(partitions, timeout=0.001)
    except KafkaException as e:
        assert e.args[0].code() == KafkaError._TIMED_OUT

    try:
        kc.list_topics(timeout=0.2)
    except KafkaException as e:
        assert e.args[0].code() in (KafkaError._TIMED_OUT, KafkaError._TRANSPORT)

    try:
        kc.list_topics(topic="hi", timeout=0.1)
    except KafkaException as e:
        assert e.args[0].code() in (KafkaError._TIMED_OUT, KafkaError._TRANSPORT)

    kc.close()
Exemplo n.º 26
0
Arquivo: kafka.py Projeto: Appva/snuba
class KafkaConsumer(Consumer[TopicPartition, int, bytes]):
    """
    The behavior of this consumer differs slightly from the Confluent
    consumer during rebalancing operations. Whenever a partition is assigned
    to this consumer, offsets are *always* automatically reset to the
    committed offset for that partition (or if no offsets have been committed
    for that partition, the offset is reset in accordance with the
    ``auto.offset.reset`` configuration value.) This causes partitions that
    are maintained across a rebalance to have the same offset management
    behavior as a partition that is moved from one consumer to another. To
    prevent uncommitted messages from being consumed multiple times,
    ``commit`` should be called in the partition revocation callback.

    The behavior of ``auto.offset.reset`` also differs slightly from the
    Confluent consumer as well: offsets are only reset during initial
    assignment or subsequent rebalancing operations. Any other circumstances
    that would otherwise lead to preemptive offset reset (e.g. the consumer
    tries to read a message that is before the earliest offset, or the
    consumer attempts to read a message that is after the latest offset) will
    cause an exception to be thrown, rather than resetting the offset, as
    this could lead to chunks messages being replayed or skipped, depending
    on the circumstances. This also means that if the committed offset is no
    longer available (such as when reading older messages from the log and
    those messages expire, or reading newer messages from the log and the
    leader crashes and partition ownership fails over to an out-of-date
    replica), the consumer will fail-stop rather than reset to the value of
    ``auto.offset.reset``.
    """

    # Set of logical offsets that do not correspond to actual log positions.
    # These offsets should be considered an implementation detail of the Kafka
    # consumer and not used publically.
    # https://github.com/confluentinc/confluent-kafka-python/blob/443177e1c83d9b66ce30f5eb8775e062453a738b/tests/test_enums.py#L22-L25
    LOGICAL_OFFSETS = frozenset(
        [OFFSET_BEGINNING, OFFSET_END, OFFSET_STORED, OFFSET_INVALID])

    def __init__(self, configuration: Mapping[str, Any]) -> None:
        auto_offset_reset = configuration.get("auto.offset.reset", "largest")
        if auto_offset_reset in {"smallest", "earliest", "beginning"}:
            self.__resolve_partition_starting_offset = (
                self.__resolve_partition_offset_earliest)
        elif auto_offset_reset in {"largest", "latest", "end"}:
            self.__resolve_partition_starting_offset = (
                self.__resolve_partition_offset_latest)
        elif auto_offset_reset == "error":
            self.__resolve_partition_starting_offset = (
                self.__resolve_partition_offset_error)
        else:
            raise ValueError(
                "invalid value for 'auto.offset.reset' configuration")

        # NOTE: Offsets are explicitly managed as part of the assignment
        # callback, so preemptively resetting offsets is not enabled.
        self.__consumer = ConfluentConsumer({
            **configuration, "auto.offset.reset":
            "error"
        })

        self.__offsets: MutableMapping[TopicPartition, int] = {}

        self.__state = KafkaConsumerState.CONSUMING

    def __resolve_partition_offset_earliest(
            self,
            partition: ConfluentTopicPartition) -> ConfluentTopicPartition:
        low, high = self.__consumer.get_watermark_offsets(partition)
        return ConfluentTopicPartition(partition.topic, partition.partition,
                                       low)

    def __resolve_partition_offset_latest(
            self,
            partition: ConfluentTopicPartition) -> ConfluentTopicPartition:
        low, high = self.__consumer.get_watermark_offsets(partition)
        return ConfluentTopicPartition(partition.topic, partition.partition,
                                       high)

    def __resolve_partition_offset_error(
            self,
            partition: ConfluentTopicPartition) -> ConfluentTopicPartition:
        raise ConsumerError("unable to resolve partition offsets")

    def subscribe(
        self,
        topics: Sequence[str],
        on_assign: Optional[Callable[[Sequence[TopicPartition]], None]] = None,
        on_revoke: Optional[Callable[[Sequence[TopicPartition]], None]] = None,
    ) -> None:
        if self.__state is not KafkaConsumerState.CONSUMING:
            raise InvalidState(self.__state)

        def assignment_callback(
                consumer: ConfluentConsumer,
                partitions: Sequence[ConfluentTopicPartition]) -> None:
            self.__state = KafkaConsumerState.ASSIGNING

            try:
                assignment: MutableSequence[ConfluentTopicPartition] = []

                for partition in self.__consumer.committed(partitions):
                    if partition.offset >= 0:
                        assignment.append(partition)
                    elif partition.offset == OFFSET_INVALID:
                        assignment.append(
                            self.__resolve_partition_starting_offset(
                                partition))
                    else:
                        raise ValueError("received unexpected offset")

                offsets: MutableMapping[TopicPartition, int] = {
                    TopicPartition(i.topic, i.partition): i.offset
                    for i in assignment
                }
                self.__seek(offsets)
            except Exception:
                self.__state = KafkaConsumerState.ERROR
                raise

            try:
                if on_assign is not None:
                    on_assign(list(offsets.keys()))
            finally:
                self.__state = KafkaConsumerState.CONSUMING

        def revocation_callback(
                consumer: ConfluentConsumer,
                partitions: Sequence[ConfluentTopicPartition]) -> None:
            self.__state = KafkaConsumerState.REVOKING

            streams = [
                TopicPartition(i.topic, i.partition) for i in partitions
            ]

            try:
                if on_revoke is not None:
                    on_revoke(streams)
            finally:
                for stream in streams:
                    try:
                        self.__offsets.pop(stream)
                    except KeyError:
                        # If there was an error during assignment, this stream
                        # may have never been added to the offsets mapping.
                        logger.warning(
                            "failed to delete offset for unknown stream: %r",
                            stream)

                self.__state = KafkaConsumerState.CONSUMING

        self.__consumer.subscribe(topics,
                                  on_assign=assignment_callback,
                                  on_revoke=revocation_callback)

    def unsubscribe(self) -> None:
        if self.__state is not KafkaConsumerState.CONSUMING:
            raise InvalidState(self.__state)

        self.__consumer.unsubscribe()

    def poll(self, timeout: Optional[float] = None) -> Optional[KafkaMessage]:
        if self.__state is not KafkaConsumerState.CONSUMING:
            raise InvalidState(self.__state)

        message: Optional[ConfluentMessage] = self.__consumer.poll(
            *[timeout] if timeout is not None else [])
        if message is None:
            return None

        error: Optional[KafkaError] = message.error()
        if error is not None:
            code = error.code()
            if code == KafkaError._PARTITION_EOF:
                raise EndOfStream(
                    TopicPartition(message.topic(), message.partition()),
                    message.offset(),
                )
            elif code == KafkaError._TRANSPORT:
                raise TransportError(str(error))
            else:
                raise ConsumerError(str(error))

        result = KafkaMessage(
            TopicPartition(message.topic(), message.partition()),
            message.offset(),
            message.value(),
        )

        self.__offsets[result.stream] = result.get_next_offset()

        return result

    def tell(self) -> Mapping[TopicPartition, int]:
        if self.__state in {
                KafkaConsumerState.CLOSED, KafkaConsumerState.ERROR
        }:
            raise InvalidState(self.__state)

        return self.__offsets

    def __seek(self, offsets: Mapping[TopicPartition, int]) -> None:
        if self.__state is KafkaConsumerState.ASSIGNING:
            # Calling ``seek`` on the Confluent consumer from an assignment
            # callback will throw an "Erroneous state" error. Instead,
            # partition offsets have to be initialized by calling ``assign``.
            self.__consumer.assign([
                ConfluentTopicPartition(stream.topic, stream.partition, offset)
                for stream, offset in offsets.items()
            ])
        else:
            for stream, offset in offsets.items():
                self.__consumer.seek(
                    ConfluentTopicPartition(stream.topic, stream.partition,
                                            offset))

        self.__offsets.update(offsets)

    def seek(self, offsets: Mapping[TopicPartition, int]) -> None:
        if self.__state in {
                KafkaConsumerState.CLOSED, KafkaConsumerState.ERROR
        }:
            raise InvalidState(self.__state)

        if offsets.keys() - self.__offsets.keys():
            raise ConsumerError("cannot seek on unassigned streams")

        self.__seek(offsets)

    def commit(self) -> Mapping[TopicPartition, int]:
        if self.__state in {
                KafkaConsumerState.CLOSED, KafkaConsumerState.ERROR
        }:
            raise InvalidState(self.__state)

        result: Optional[Sequence[ConfluentTopicPartition]] = None

        retries_remaining = 3
        while result is None:
            try:
                result = self.__consumer.commit(asynchronous=False)
                assert result is not None
            except KafkaException as e:
                if not e.args[0].code() in (
                        KafkaError.REQUEST_TIMED_OUT,
                        KafkaError.NOT_COORDINATOR_FOR_GROUP,
                        KafkaError._WAIT_COORD,
                ):
                    raise

                if not retries_remaining:
                    raise

                logger.warning(
                    "Commit failed: %s (%d retries remaining)",
                    str(e),
                    retries_remaining,
                )
                retries_remaining -= 1
                time.sleep(1)

        offsets: MutableMapping[TopicPartition, int] = {}

        for value in result:
            # The Confluent Kafka Consumer will include logical offsets in the
            # sequence of ``TopicPartition`` objects returned by ``commit``.
            # These are an implementation detail of the Kafka Consumer, so we
            # don't expose them here.
            # NOTE: These should no longer be seen now that we are forcing
            # offsets to be set as part of the assignment callback.
            if value.offset in self.LOGICAL_OFFSETS:
                continue

            assert value.offset >= 0, "expected non-negative offset"
            offsets[TopicPartition(value.topic,
                                   value.partition)] = value.offset

        return offsets

    def close(self, timeout: Optional[float] = None) -> None:
        try:
            self.__consumer.close()
        except RuntimeError:
            pass

        self.__state = KafkaConsumerState.CLOSED
Exemplo n.º 27
0
def consumer_seek_to_end_of_topic(consumer: Consumer, data_topic: str):
    consumer.unsubscribe()
    sleep(1)
    # Resubscribe at end of topic
    consumer.subscribe([data_topic])
Exemplo n.º 28
0
class BusConsumer:
    def __init__(self):

        # Pre-shared credentials
        self.credentials = json.load(open('bus_credentials.json'))

        # Construct required configuration
        self.configuration = {
            'client.id': 'CRCL_consumer',
            'group.id': 'CRCL_consumer_group',
            'bootstrap.servers': ','.join(self.credentials['kafka_brokers_sasl']),
            'security.protocol': 'SASL_SSL',
            'ssl.ca.location': '/etc/ssl/certs',
            'sasl.mechanisms': 'PLAIN',
            'sasl.username': self.credentials['api_key'][0:16],
            'sasl.password': self.credentials['api_key'][16:48],
            'api.version.request': True
        }

        self.consumer = Consumer(self.configuration)

        self.listening = True

        self.database = 'messages.sqlite'

    def listen(self, topics):
        # Topics should be a list of topic names e.g. ['topic1', 'topic2']

        self.listening = True

        # Subscribe to topics
        try:
            self.consumer.subscribe(topics)
        except Exception as e:
            print(e)
            return False

        # Initiate a loop for continuous listening
        while self.listening:
            msg = self.consumer.poll(0)

            # If a message is received and it is not an error message
            if msg is not None and msg.error() is None:
                # print('Message consumed: topic={0}, partition={1}, offset={2}, key={3}, value={4}'.format(
                #     msg.topic(),
                #     msg.partition(),
                #     msg.offset(),
                #     msg.key().decode('utf-8'),
                #     msg.value().decode('utf-8')))

                # print('Message consumed: topic={0}, partition={1}, offset={2}, key={3}'.format(
                #     msg.topic(),
                #     msg.partition(),
                #     msg.offset(),
                #     msg.key().decode('utf-8')))

                # print("RECEIVED: " + msg.topic())

                # Add incoming message to requests database
                try:
                    message_text = msg.value().decode('utf-8')
                except:
                    message_text = msg.value()

                self.submit_message_to_database(message_text)

            # Sleep for a while
            # asyncio.sleep(0.43)

        # Unsubscribe and close consumer
        self.consumer.unsubscribe()
        self.consumer.close()

    def stop(self):
        self.listening = False

    def submit_message_to_database(self, message):

        try:
            con = sqlite3.connect(self.database)

            with con:
                cur = con.cursor()
                cur.execute('INSERT INTO requests (message) VALUES (?)', (message,))

            cur.close()

        except sqlite3.Error as e:
            print("Error %s:" % e.args[0])
            return False
Exemplo n.º 29
0
class KafkaConsumer:
    """Defines the base kafka consumer class"""
    def __init__(
        self,
        topic_name_pattern,
        message_handler,
        is_avro=True,
        offset_earliest=False,
        sleep_secs=1.0,
        consume_timeout=0.1,
    ):
        """Creates a consumer object for asynchronous use"""
        self.topic_name_pattern = topic_name_pattern
        self.message_handler = message_handler
        self.sleep_secs = sleep_secs
        self.consume_timeout = consume_timeout
        self.offset_earliest = offset_earliest

        #
        self.broker_properties = {
            "group.id": "group1",
            "bootstrap.servers": common.BROKER_URL,
            "linger.ms": 1000,
            "acks": 1,
            "retries": 3,
            "message.max.bytes": 4 * 4096,
            "batch.num.messages": 10
        }

        # TODO: Create the Consumer, using the appropriate type.
        if is_avro is True:
            self.broker_properties[
                "schema.registry.url"] = common.SCHEMA_REGISTRY_URL
            self.consumer = AvroConsumer(self.broker_properties)
        else:
            self.consumer = Consumer(self.broker_properties)

        self.consumer.subscribe([topic_name_pattern], on_assign=self.on_assign)

    def on_assign(self, consumer, partitions):
        """Callback for when topic assignment takes place"""
        # TODO: If the topic is configured to use `offset_earliest` set the partition offset to
        # the beginning or earliest
        logger.info("on_assign is incomplete - skipping")
        if self.offset_earliest:
            for partition in partitions:
                partition.offset = confluent_kafka.OFFSET_BEGINNING

        logger.info("partitions assigned for %s", self.topic_name_pattern)
        consumer.assign(partitions)

    async def consume(self):
        """Asynchronously consumes data from kafka topic"""
        while True:
            num_results = 1
            while num_results > 0:
                num_results = self._consume()
            await gen.sleep(self.sleep_secs)

    def _consume(self):
        """Polls for a message. Returns 1 if a message was received, 0 otherwise"""
        #
        #
        # TODO: Poll Kafka for messages. Make sure to handle any errors or exceptions.
        # Additionally, make sure you return 1 when a message is processed, and 0 when no message
        # is retrieved.
        #
        #

        message = self.consumer.poll(1.0)

        if message is None:
            logger.warn("_consume empty message")
            return 0
        elif message.error() is not None:
            logger.error(f"_consume met error {message.error()}")
            return 0
        else:
            try:
                logger.info(message.value())
            except KeyError as e:
                logger.error(f"_consumer compact message failed: {e}")

        return 1

    def close(self):
        """Cleans up any open kafka consumers"""
        self.consumer.unsubscribe()
        self.consumer.close()
Exemplo n.º 30
0
class BusConsumer:
    def __init__(self):

        # Pre-shared credentials
        self.credentials = json.load(open('bus_credentials.json'))

        # Construct required configuration
        self.configuration = {
            'client.id': 'bus_logger',
            'group.id': 'bus_logger_group',
            'bootstrap.servers': ','.join(self.credentials['kafka_brokers_sasl']),
            'security.protocol': 'SASL_SSL',
            'ssl.ca.location': '/etc/ssl/certs',
            'sasl.mechanisms': 'PLAIN',
            'sasl.username': self.credentials['api_key'][0:16],
            'sasl.password': self.credentials['api_key'][16:48],
            'api.version.request': True
        }

        self.consumer = Consumer(self.configuration)

        self.listening = True

        self.database = 'log.sqlite'

    def listen(self, topics):
        # Topics should be a list of topic names e.g. ['topic1', 'topic2']

        self.listening = True

        # Subscribe to topics
        try:
            self.consumer.subscribe(topics)
        except Exception as e:
            print(e)
            return False

        # Initiate a loop for continuous listening
        while self.listening:
            msg = self.consumer.poll(1)

            # If a message is received and it is not an error message
            if msg is not None and msg.error() is None:

                # Add incoming message to requests database
                try:
                    topic = msg.topic()
                except:
                    topic = "Undefined"

                try:
                    offset = str(msg.offset())
                except:
                    offset = "Undefined"

                try:
                    message_text = msg.value().decode('utf-8')
                except:
                    message_text = msg.value()

                # self.submit_message_to_sqlite_database(topic, message_text, offset)
                self.submit_message_to_mysql_database(topic, message_text, offset)

            # Sleep for a while
            asyncio.sleep(0.43)

        # Unsubscribe and close consumer
        self.consumer.unsubscribe()
        self.consumer.close()

    def stop(self):
        self.listening = False

    def submit_message_to_sqlite_database(self, topic, message, offset):

        # Get UTC time as string
        timestamp = datetime.utcnow().strftime("%Y/%m/%d %H:%M:%S.%f")

        try:
            con = sqlite3.connect(self.database)

            with con:
                cur = con.cursor()
                cur.execute('INSERT INTO requests (topic, message, timestamp, offset) VALUES (?, ?, ?, ?)', (topic, message, timestamp, offset))

            cur.close()

            # print('# Message logged:' + timestamp + " - Topic: " + topic)

        except sqlite3.Error as e:
            print("Error %s:" % e.args[0])
            return False

        # con = sqlite3.connect(self.database)
        #
        # with con:
        #     cur = con.cursor()
        #     cur.execute('INSERT INTO requests (message) VALUES (?)', (message,))
        #
        # cur.close()

    def submit_message_to_mysql_database(self, topic, message, offset):

        # Get UTC time as string
        timestamp = datetime.utcnow().strftime("%Y/%m/%d %H:%M:%S:%f")

        # Connect to the database
        connection = pymysql.connect(host='localhost',
                                     user='******',
                                     password='******',
                                     db='bus_log',
                                     charset='utf8mb4',
                                     cursorclass=pymysql.cursors.DictCursor)

        try:
            with connection.cursor() as cur:

                cur.execute('INSERT INTO messages (topic, message, timestamp, offset) VALUES (%s, %s, %s, %s)', (topic, message, timestamp, offset))

            # connection is not autocommit by default. So you must commit to save
            # your changes.
            connection.commit()

            print('# Message logged:' + timestamp + " - Topic: " + topic)

        finally:
            connection.close()

    def empty_sqlite_database(self):
        # Connect to the database
        connection = pymysql.connect(host='localhost',
                                     user='******',
                                     password='******',
                                     db='bus_log',
                                     charset='utf8mb4',
                                     cursorclass=pymysql.cursors.DictCursor)

        try:
            with connection.cursor() as cur:

                cur.execute('DELETE FROM messages')

            # connection is not autocommit by default. So you must commit to save
            # your changes.
            connection.commit()

        finally:
            connection.close()

    def empty_mysql_database(self):
        try:
            con = sqlite3.connect(self.database)

            with con:
                cur = con.cursor()
                cur.execute('DELETE FROM messages')

            cur.close()

            print("Database was cleared")

        except sqlite3.Error as e:
            print("Error %s:" % e.args[0])
            return False
Exemplo n.º 31
0
class KafkaConsumer(Consumer[TPayload]):
    """
    The behavior of this consumer differs slightly from the Confluent
    consumer during rebalancing operations. Whenever a partition is assigned
    to this consumer, offsets are *always* automatically reset to the
    committed offset for that partition (or if no offsets have been committed
    for that partition, the offset is reset in accordance with the
    ``auto.offset.reset`` configuration value.) This causes partitions that
    are maintained across a rebalance to have the same offset management
    behavior as a partition that is moved from one consumer to another. To
    prevent uncommitted messages from being consumed multiple times,
    ``commit`` should be called in the partition revocation callback.

    The behavior of ``auto.offset.reset`` also differs slightly from the
    Confluent consumer as well: offsets are only reset during initial
    assignment or subsequent rebalancing operations. Any other circumstances
    that would otherwise lead to preemptive offset reset (e.g. the consumer
    tries to read a message that is before the earliest offset, or the
    consumer attempts to read a message that is after the latest offset) will
    cause an exception to be thrown, rather than resetting the offset, as
    this could lead to chunks messages being replayed or skipped, depending
    on the circumstances. This also means that if the committed offset is no
    longer available (such as when reading older messages from the log and
    those messages expire, or reading newer messages from the log and the
    leader crashes and partition ownership fails over to an out-of-date
    replica), the consumer will fail-stop rather than reset to the value of
    ``auto.offset.reset``.
    """

    # Set of logical offsets that do not correspond to actual log positions.
    # These offsets should be considered an implementation detail of the Kafka
    # consumer and not used publically.
    # https://github.com/confluentinc/confluent-kafka-python/blob/443177e1c83d9b66ce30f5eb8775e062453a738b/tests/test_enums.py#L22-L25
    LOGICAL_OFFSETS = frozenset(
        [OFFSET_BEGINNING, OFFSET_END, OFFSET_STORED, OFFSET_INVALID])

    def __init__(
        self,
        configuration: Mapping[str, Any],
        codec: Codec[KafkaPayload, TPayload],
        *,
        commit_retry_policy: Optional[RetryPolicy] = None,
    ) -> None:
        if commit_retry_policy is None:
            commit_retry_policy = NoRetryPolicy()

        auto_offset_reset = configuration.get("auto.offset.reset", "largest")
        if auto_offset_reset in {"smallest", "earliest", "beginning"}:
            self.__resolve_partition_starting_offset = (
                self.__resolve_partition_offset_earliest)
        elif auto_offset_reset in {"largest", "latest", "end"}:
            self.__resolve_partition_starting_offset = (
                self.__resolve_partition_offset_latest)
        elif auto_offset_reset == "error":
            self.__resolve_partition_starting_offset = (
                self.__resolve_partition_offset_error)
        else:
            raise ValueError(
                "invalid value for 'auto.offset.reset' configuration")

        if (as_kafka_configuration_bool(
                configuration.get("enable.auto.commit", "true")) is not False):
            raise ValueError(
                "invalid value for 'enable.auto.commit' configuration")

        if (as_kafka_configuration_bool(
                configuration.get("enable.auto.offset.store", "true"))
                is not False):
            raise ValueError(
                "invalid value for 'enable.auto.offset.store' configuration")

        # NOTE: Offsets are explicitly managed as part of the assignment
        # callback, so preemptively resetting offsets is not enabled.
        self.__consumer = ConfluentConsumer({
            **configuration, "auto.offset.reset":
            "error"
        })

        self.__codec = codec

        self.__offsets: MutableMapping[Partition, int] = {}
        self.__staged_offsets: MutableMapping[Partition, int] = {}
        self.__paused: Set[Partition] = set()

        self.__commit_retry_policy = commit_retry_policy

        self.__state = KafkaConsumerState.CONSUMING

    def __resolve_partition_offset_earliest(
            self,
            partition: ConfluentTopicPartition) -> ConfluentTopicPartition:
        low, high = self.__consumer.get_watermark_offsets(partition)
        return ConfluentTopicPartition(partition.topic, partition.partition,
                                       low)

    def __resolve_partition_offset_latest(
            self,
            partition: ConfluentTopicPartition) -> ConfluentTopicPartition:
        low, high = self.__consumer.get_watermark_offsets(partition)
        return ConfluentTopicPartition(partition.topic, partition.partition,
                                       high)

    def __resolve_partition_offset_error(
            self,
            partition: ConfluentTopicPartition) -> ConfluentTopicPartition:
        raise ConsumerError("unable to resolve partition offsets")

    def subscribe(
        self,
        topics: Sequence[Topic],
        on_assign: Optional[Callable[[Mapping[Partition, int]], None]] = None,
        on_revoke: Optional[Callable[[Sequence[Partition]], None]] = None,
    ) -> None:
        """
        Subscribe to topics. This replaces a previous subscription.

        This method does not block. The subscription may not be fulfilled
        immediately: instead, the ``on_assign`` and ``on_revoke`` callbacks
        are called when the subscription state changes with the updated
        assignment for this consumer.

        If provided, the ``on_assign`` callback is called with a mapping of
        partitions to their offsets (at this point, the working offset and the
        committed offset are the same for each partition) on each subscription
        change. Similarly, the ``on_revoke`` callback (if provided) is called
        with a sequence of partitions that are being removed from this
        consumer's assignment. (This callback does not include the offsets,
        as the working offset and committed offset may differ, in some cases
        by substantial margin.)

        Raises an ``InvalidState`` exception if called on a closed consumer.
        """
        if self.__state is not KafkaConsumerState.CONSUMING:
            raise InvalidState(self.__state)

        def assignment_callback(
                consumer: ConfluentConsumer,
                partitions: Sequence[ConfluentTopicPartition]) -> None:
            self.__state = KafkaConsumerState.ASSIGNING

            try:
                assignment: MutableSequence[ConfluentTopicPartition] = []

                for partition in self.__consumer.committed(partitions):
                    if partition.offset >= 0:
                        assignment.append(partition)
                    elif partition.offset == OFFSET_INVALID:
                        assignment.append(
                            self.__resolve_partition_starting_offset(
                                partition))
                    else:
                        raise ValueError("received unexpected offset")

                offsets: MutableMapping[Partition, int] = {
                    Partition(Topic(i.topic), i.partition): i.offset
                    for i in assignment
                }
                self.__seek(offsets)

                # Ensure that all partitions are resumed on assignment to avoid
                # carrying over state from a previous assignment.
                self.__consumer.resume([
                    ConfluentTopicPartition(partition.topic.name,
                                            partition.index, offset)
                    for partition, offset in offsets.items()
                ])

                for partition in offsets:
                    self.__paused.discard(partition)
            except Exception:
                self.__state = KafkaConsumerState.ERROR
                raise

            try:
                if on_assign is not None:
                    on_assign(offsets)
            finally:
                self.__state = KafkaConsumerState.CONSUMING

        def revocation_callback(
                consumer: ConfluentConsumer,
                partitions: Sequence[ConfluentTopicPartition]) -> None:
            self.__state = KafkaConsumerState.REVOKING

            partitions = [
                Partition(Topic(i.topic), i.partition) for i in partitions
            ]

            try:
                if on_revoke is not None:
                    on_revoke(partitions)
            finally:
                for partition in partitions:
                    # Staged offsets are deleted during partition revocation to
                    # prevent later committing offsets for partitions that are
                    # no longer owned by this consumer.
                    if partition in self.__staged_offsets:
                        logger.warning(
                            "Dropping staged offset for revoked partition (%r)!",
                            partition,
                        )
                        del self.__staged_offsets[partition]

                    try:
                        self.__offsets.pop(partition)
                    except KeyError:
                        # If there was an error during assignment, this
                        # partition may have never been added to the offsets
                        # mapping.
                        logger.warning(
                            "failed to delete offset for unknown partition: %r",
                            partition,
                        )

                    self.__paused.discard(partition)

                self.__state = KafkaConsumerState.CONSUMING

        self.__consumer.subscribe(
            [topic.name for topic in topics],
            on_assign=assignment_callback,
            on_revoke=revocation_callback,
        )

    def unsubscribe(self) -> None:
        """
        Unsubscribe from topics.

        Raises an ``InvalidState`` exception if called on a closed consumer.
        """
        if self.__state is not KafkaConsumerState.CONSUMING:
            raise InvalidState(self.__state)

        self.__consumer.unsubscribe()

    def poll(self,
             timeout: Optional[float] = None) -> Optional[Message[TPayload]]:
        """
        Return the next message available to be consumed, if one is
        available. If no message is available, this method will block up to
        the ``timeout`` value before returning ``None``. A timeout of
        ``0.0`` represents "do not block", while a timeout of ``None``
        represents "block until a message is available (or forever)".

        Calling this method may also invoke subscription state change
        callbacks.

        This method may also raise an ``EndOfPartition`` error (a subtype of
        ``ConsumerError``) when the consumer has reached the end of a
        partition that it is subscribed to and no additional messages are
        available. The ``partition`` attribute of the raised exception
        specifies the end which partition has been reached. (Since this
        consumer is multiplexing a set of partitions, this exception does not
        mean that *all* of the partitions that the consumer is subscribed to
        do not have any messages, just that it has reached the end of one of
        them. This also does not mean that additional messages won't be
        available in future poll calls.) Not every backend implementation
        supports this feature or is configured to raise in this scenario.

        Raises an ``InvalidState`` exception if called on a closed consumer.

        Raises a ``TransportError`` for various other consumption-related
        errors.
        """
        if self.__state is not KafkaConsumerState.CONSUMING:
            raise InvalidState(self.__state)

        message: Optional[ConfluentMessage] = self.__consumer.poll(
            *[timeout] if timeout is not None else [])
        if message is None:
            return None

        error: Optional[KafkaError] = message.error()
        if error is not None:
            code = error.code()
            if code == KafkaError._PARTITION_EOF:
                raise EndOfPartition(
                    Partition(Topic(message.topic()), message.partition()),
                    message.offset(),
                )
            elif code == KafkaError._TRANSPORT:
                raise TransportError(str(error))
            else:
                raise ConsumerError(str(error))

        headers: Optional[Headers] = message.headers()
        result = Message(
            Partition(Topic(message.topic()), message.partition()),
            message.offset(),
            self.__codec.decode(
                KafkaPayload(
                    message.key(),
                    message.value(),
                    headers if headers is not None else [],
                )),
            datetime.utcfromtimestamp(message.timestamp()[1] / 1000.0),
        )

        self.__offsets[result.partition] = result.get_next_offset()

        return result

    def tell(self) -> Mapping[Partition, int]:
        """
        Return the read offsets for all assigned partitions.

        Raises an ``InvalidState`` if called on a closed consumer.
        """
        if self.__state in {
                KafkaConsumerState.CLOSED, KafkaConsumerState.ERROR
        }:
            raise InvalidState(self.__state)

        return self.__offsets

    def __validate_offsets(self, offsets: Mapping[Partition, int]) -> None:
        invalid_offsets: Mapping[Partition, int] = {
            partition: offset
            for partition, offset in offsets.items() if offset < 0
        }

        if invalid_offsets:
            raise ConsumerError(f"invalid offsets: {invalid_offsets!r}")

    def __seek(self, offsets: Mapping[Partition, int]) -> None:
        self.__validate_offsets(offsets)

        if self.__state is KafkaConsumerState.ASSIGNING:
            # Calling ``seek`` on the Confluent consumer from an assignment
            # callback will throw an "Erroneous state" error. Instead,
            # partition offsets have to be initialized by calling ``assign``.
            self.__consumer.assign([
                ConfluentTopicPartition(partition.topic.name, partition.index,
                                        offset)
                for partition, offset in offsets.items()
            ])
        else:
            for partition, offset in offsets.items():
                self.__consumer.seek(
                    ConfluentTopicPartition(partition.topic.name,
                                            partition.index, offset))

        self.__offsets.update(offsets)

    def seek(self, offsets: Mapping[Partition, int]) -> None:
        """
        Change the read offsets for the provided partitions.

        Raises an ``InvalidState`` if called on a closed consumer.
        """
        if self.__state in {
                KafkaConsumerState.CLOSED, KafkaConsumerState.ERROR
        }:
            raise InvalidState(self.__state)

        if offsets.keys() - self.__offsets.keys():
            raise ConsumerError("cannot seek on unassigned partitions")

        self.__seek(offsets)

    def pause(self, partitions: Sequence[Partition]) -> None:
        """
        Pause the consumption of messages for the provided partitions.

        Raises an ``InvalidState`` if called on a closed consumer.
        """
        if self.__state in {
                KafkaConsumerState.CLOSED, KafkaConsumerState.ERROR
        }:
            raise InvalidState(self.__state)

        if set(partitions) - self.__offsets.keys():
            raise ConsumerError("cannot pause unassigned partitions")

        self.__consumer.pause([
            ConfluentTopicPartition(partition.topic.name, partition.index)
            for partition in partitions
        ])

        self.__paused.update(partitions)

        # XXX: Seeking to a specific partition offset and immediately pausing
        # that partition causes the seek to be ignored for some reason.
        self.seek({
            partition: offset
            for partition, offset in self.__offsets.items()
            if partition in partitions
        })

    def resume(self, partitions: Sequence[Partition]) -> None:
        """
        Resume the consumption of messages for the provided partitions.

        Raises an ``InvalidState`` if called on a closed consumer.
        """
        if self.__state in {
                KafkaConsumerState.CLOSED, KafkaConsumerState.ERROR
        }:
            raise InvalidState(self.__state)

        if set(partitions) - self.__offsets.keys():
            raise ConsumerError("cannot resume unassigned partitions")

        self.__consumer.resume([
            ConfluentTopicPartition(partition.topic.name, partition.index)
            for partition in partitions
        ])

        for partition in partitions:
            self.__paused.discard(partition)

    def paused(self) -> Sequence[Partition]:
        if self.__state in {
                KafkaConsumerState.CLOSED, KafkaConsumerState.ERROR
        }:
            raise InvalidState(self.__state)

        return [*self.__paused]

    def stage_offsets(self, offsets: Mapping[Partition, int]) -> None:
        if self.__state in {
                KafkaConsumerState.CLOSED, KafkaConsumerState.ERROR
        }:
            raise InvalidState(self.__state)

        if offsets.keys() - self.__offsets.keys():
            raise ConsumerError(
                "cannot stage offsets for unassigned partitions")

        self.__validate_offsets(offsets)

        # TODO: Maybe log a warning if these offsets exceed the current
        # offsets, since that's probably a side effect of an incorrect usage
        # pattern?
        self.__staged_offsets.update(offsets)

    def __commit(self) -> Mapping[Partition, int]:
        if self.__state in {
                KafkaConsumerState.CLOSED, KafkaConsumerState.ERROR
        }:
            raise InvalidState(self.__state)

        result: Optional[Sequence[ConfluentTopicPartition]]

        if self.__staged_offsets:
            result = self.__consumer.commit(
                offsets=[
                    ConfluentTopicPartition(partition.topic.name,
                                            partition.index, offset)
                    for partition, offset in self.__staged_offsets.items()
                ],
                asynchronous=False,
            )
        else:
            result = []

        assert result is not None  # synchronous commit should return result immediately

        self.__staged_offsets.clear()

        offsets: MutableMapping[Partition, int] = {}

        for value in result:
            # The Confluent Kafka Consumer will include logical offsets in the
            # sequence of ``Partition`` objects returned by ``commit``. These
            # are an implementation detail of the Kafka Consumer, so we don't
            # expose them here.
            # NOTE: These should no longer be seen now that we are forcing
            # offsets to be set as part of the assignment callback.
            if value.offset in self.LOGICAL_OFFSETS:
                continue

            assert value.offset >= 0, "expected non-negative offset"
            offsets[Partition(Topic(value.topic),
                              value.partition)] = value.offset

        return offsets

    def commit_offsets(self) -> Mapping[Partition, int]:
        """
        Commit staged offsets for all partitions that this consumer is
        assigned to. The return value of this method is a mapping of
        partitions with their committed offsets as values.

        Raises an ``InvalidState`` if called on a closed consumer.
        """
        return self.__commit_retry_policy.call(self.__commit)

    def close(self, timeout: Optional[float] = None) -> None:
        """
        Close the consumer. This stops consuming messages, *may* commit
        staged offsets (depending on the configuration), and ends its
        subscription.

        Raises a ``InvalidState`` if the consumer is unable to be closed
        before the timeout is reached.
        """
        try:
            self.__consumer.close()
        except RuntimeError:
            pass

        self.__state = KafkaConsumerState.CLOSED

    @property
    def closed(self) -> bool:
        return self.__state is KafkaConsumerState.CLOSED
Exemplo n.º 32
0
class BusConsumer:
    def __init__(self):

        # Pre-shared credentials
        # self.credentials = json.load(open('bus_credentials.json'))

        self.credentials = bus.load_credentials.LoadCredentials.load_bus_credentials(
        )

        # Construct required configuration
        self.configuration = {
            'client.id': 'KB_consumer',
            'group.id': 'KB_consumer_group',
            'bootstrap.servers':
            ','.join(self.credentials['kafka_brokers_sasl']),
            'security.protocol': 'SASL_SSL',
            'ssl.ca.location': '/etc/ssl/certs',
            'sasl.mechanisms': 'PLAIN',
            'sasl.username': self.credentials['api_key'][0:16],
            'sasl.password': self.credentials['api_key'][16:48],
            'api.version.request': True
        }

        self.consumer = Consumer(self.configuration)

        self.listening = True

        self.database = 'messages.sqlite'

    def listen(self, topics):
        # Topics should be a list of topic names e.g. ['topic1', 'topic2']

        self.listening = True

        # Subscribe to topics
        try:
            self.consumer.subscribe(topics)
        except Exception as e:
            print("Error @ BusConsumer.listen()")
            print(e)
            return False

        # Initiate a loop for continuous listening
        while self.listening:
            msg = self.consumer.poll(0)

            # If a message is received and it is not an error message
            if msg is not None and msg.error() is None:

                # Add incoming message to requests database
                try:
                    message_text = msg.value().decode('utf-8')
                except:
                    message_text = msg.value()

                self.submit_message_process(message_text)

        # Unsubscribe and close consumer
        self.consumer.unsubscribe()
        self.consumer.close()

    def stop(self):
        self.listening = False

    def submit_message_process(self, message):
        znGen.generateTopic()
Exemplo n.º 33
0
class KafkaConsumer:
    """Defines the base kafka consumer class"""

    consumer_group_counter = 0
    
    def __init__(
        self,
        topic_name_pattern,
        message_handler,
        is_avro=True,
        offset_earliest=False,
        sleep_secs=1.0,
        consume_timeout=0.1,
    ):
        """Creates a consumer object for asynchronous use"""
        self.topic_name_pattern = topic_name_pattern
        self.message_handler = message_handler
        self.sleep_secs = sleep_secs
        self.consume_timeout = consume_timeout
        self.offset_earliest = offset_earliest

        #
        # Configure the broker properties below. Make sure to reference the project README
        # and use the Host URL for Kafka and Schema Registry!
        #
        
        KafkaConsumer.consumer_group_counter = KafkaConsumer.consumer_group_counter + 1
        
        if (self.offset_earliest):
            self.broker_properties = {
                CTAConstants.MAP_KEY_BOOTSTRAP_SERVERS:CTAConstants.BOOTSTRAP_SERVERS,
                "group.id":f"{CTAConstants.CONSUMER_GRP_ID_PRFX}-{KafkaConsumer.consumer_group_counter}",
                "default.topic.config": {"auto.offset.reset":"earliest"}
            }
        else:
            self.broker_properties = {
                CTAConstants.MAP_KEY_BOOTSTRAP_SERVERS:CTAConstants.BOOTSTRAP_SERVERS,
                "group.id":f"{CTAConstants.CONSUMER_GRP_ID_PRFX}-{KafkaConsumer.consumer_group_counter}",
                "default.topic.config": {"auto.offset.reset":"earliest"}
            }
        
        # Create the Consumer, using the appropriate type.
        if is_avro is True:
            self.broker_properties["schema.registry.url"] = CTAConstants.SCHEMA_REGISTRY_HOST
            self.consumer = AvroConsumer(self.broker_properties)
        else:
            self.consumer = Consumer(self.broker_properties)

        #
        # Configure the AvroConsumer and subscribe to the topics. Make sure to think about
        # how the `on_assign` callback should be invoked.
        #
        self.consumer.subscribe([topic_name_pattern], on_assign=self.on_assign)
        
        logger.info(f"Instantiated consumer and subscribed: ({self.topic_name_pattern})")

    
    # Called back on assign of partition(s) to this Consumer.        
    def on_assign(self, consumer, partitions):
        """Callback for when topic assignment takes place"""
        # If the topic is configured to use `offset_earliest` set the partition offset to
        # the beginning or earliest
        logger.info("on_assign")
        if (self.offset_earliest):
            for partition in partitions:
                #partition.offset(Offset.OFFSET_BEGINNING)
                partition.offset = confluent_kafka.OFFSET_BEGINNING

        logger.info("partitions assigned for %s", self.topic_name_pattern)
        consumer.assign(partitions)

    async def consume(self):
        """Asynchronously consumes data from kafka topic"""
        while True:
            num_results = 1
            while num_results > 0:
                num_results = self._consume()
            await gen.sleep(self.sleep_secs)

    def _consume(self):
        """Polls for a message. Returns 1 if a message was received, 0 otherwise"""
        # Poll Kafka for messages. Make sure to handle any errors or exceptions.
        # Additionally, make sure you return 1 when a message is processed, and 0 when no message
        # is retrieved.
        logger.debug(f"In _consume({self.topic_name_pattern})")
        
        try:
            msg = self.consumer.poll(timeout=self.consume_timeout)
            if (msg is None):
                logger.debug("No msg in topic yet.")
                return 0;
            else:
                if (msg.error() is None):
                    logger.debug("Got msg.")
                    self.message_handler(msg)
                else:
                    # handle error.
                    error = msg.error()
                    logger.error(f"Error in consumer:{self.topic_name_pattern} while consuming msgs. Err code: {error.code()}, error-name: {error.name()}, error.str:{error.str()}" )
                    
                return 1
        except RuntimeError as re:
            logger.error(f"Runtime error in consumer:{self.topic_name_pattern}. Err msg: {re.message}" )

    def close(self):
        """Cleans up any open kafka consumers"""
        # Cleanup the kafka consumer
        self.consumer.unassign()
        self.consumer.unsubscribe()
        
                         
                         
                         
        
                         
class KafkaConsumer(BaseKafkaConsumer):
    def __init__(self, config):
        self._config = config["consumer"]
        self.assign_offset_end = self._config.get("assign_offset_end", False)
        conf = self._config["conf"]
        conf.setdefault("group.id", str(uuid.uuid1()))
        self.autocommit_enabled = conf.get("enable.auto.commit", True)
        internal_log_path = self._config.get("internal_log_path")
        conf["error_cb"] = self._error_callback
        if internal_log_path:
            debug_logger = logging.getLogger("debug_consumer")
            timestamp = time.strftime("_%d%m%Y_")
            debug_logger.addHandler(
                logging.FileHandler("{}/kafka_consumer_debug{}{}.log".format(
                    internal_log_path, timestamp, os.getpid())))
            conf["logger"] = debug_logger
        self._consumer = Consumer(**conf)

    @staticmethod
    def on_assign_offset_end(consumer, partitions):
        for p in partitions:
            p.offset = OFFSET_END
        KafkaConsumer.on_assign_log(consumer, partitions)
        consumer.assign(partitions)

    @staticmethod
    def on_coop_assign_offset_end(consumer, partitions):
        for p in partitions:
            p.offset = OFFSET_END
        KafkaConsumer.on_assign_log(consumer, partitions)
        consumer.incremental_assign(partitions)

    @staticmethod
    def on_assign_log(consumer, partitions):
        log_level = "WARNING"
        for p in partitions:
            if p.error:
                log_level = "ERROR"
        params = {
            "partitions":
            str(list([str(partition) for partition in partitions or []])),
            log_const.KEY_NAME:
            log_const.KAFKA_ON_ASSIGN_VALUE,
            "log_level":
            log_level
        }
        log("KafkaConsumer.subscribe<on_assign>: assign %(partitions)s %(log_level)s",
            params=params,
            level=log_level)

    def subscribe(self, topics=None):
        topics = topics or list(self._config["topics"].values())

        self._consumer.subscribe(
            topics,
            on_assign=self.get_on_assign_callback()
            if self.assign_offset_end else KafkaConsumer.on_assign_log)

    def get_on_assign_callback(self):
        if "cooperative" in self._config["conf"].get(
                "partition.assignment.strategy", ""):
            callback = KafkaConsumer.on_coop_assign_offset_end
        else:
            callback = KafkaConsumer.on_assign_offset_end
        return callback

    def unsubscribe(self):
        self._consumer.unsubscribe()

    def poll(self):
        msg = self._consumer.poll(self._config["poll_timeout"])
        if msg is not None:
            return self._process_message(msg)

    def consume(self, num_messages: int = 1):
        messages = self._consumer.consume(num_messages=num_messages,
                                          timeout=self._config["poll_timeout"])
        for msg in messages:
            yield self._process_message(msg)

    def commit_offset(self, msg):
        if msg is not None:
            if self.autocommit_enabled:
                self._consumer.store_offsets(msg)
            else:
                self._consumer.commit(msg, **{"async": True})

    def get_msg_create_time(self, mq_message):
        timestamp_type, timestamp = mq_message.timestamp()
        return timestamp if timestamp_type is not TIMESTAMP_NOT_AVAILABLE else None

    def _error_callback(self, err):
        params = {
            "error": str(err),
            log_const.KEY_NAME: log_const.EXCEPTION_VALUE
        }
        log("KafkaConsumer: Error: %(error)s", params=params, level="WARNING")
        monitoring.got_counter("kafka_consumer_exception")

    # noinspection PyMethodMayBeStatic
    def _process_message(self, msg: KafkaMessage):
        err = msg.error()
        if err:
            if err.code() == KafkaError._PARTITION_EOF:
                return None
            else:
                monitoring.got_counter("kafka_consumer_exception")
                params = {
                    "code": err.code(),
                    "pid": os.getpid(),
                    "topic": msg.topic(),
                    "partition": msg.partition(),
                    "offset": msg.offset(),
                    log_const.KEY_NAME: log_const.EXCEPTION_VALUE
                }
                log(
                    "KafkaConsumer Error %(code)s at pid %(pid)s: topic=%(topic)s partition=[%(partition)s] "
                    "reached end at offset %(offset)s\n",
                    params=params,
                    level="WARNING")
                raise KafkaException(err)

        if msg.value():
            if msg.headers() is None:
                msg.set_headers([])
            return msg

    def close(self):
        self._consumer.close()
        log(f"consumer to topics {self._config['topics']} closed.")
Exemplo n.º 35
0
def test_basic_api():
    """ Basic API tests, these wont really do anything since there is no
        broker configured. """

    try:
        kc = Consumer()
    except TypeError as e:
        assert str(e) == "expected configuration dict"

    def dummy_commit_cb(err, partitions):
        pass

    kc = Consumer({
        'group.id': 'test',
        'socket.timeout.ms': '100',
        'session.timeout.ms': 1000,  # Avoid close() blocking too long
        'on_commit': dummy_commit_cb
    })

    kc.subscribe(["test"])
    kc.unsubscribe()

    def dummy_assign_revoke(consumer, partitions):
        pass

    kc.subscribe(["test"],
                 on_assign=dummy_assign_revoke,
                 on_revoke=dummy_assign_revoke)
    kc.unsubscribe()

    msg = kc.poll(timeout=0.001)
    if msg is None:
        print('OK: poll() timeout')
    elif msg.error():
        print('OK: consumer error: %s' % msg.error().str())
    else:
        print('OK: consumed message')

    if msg is not None:
        assert msg.timestamp() == (TIMESTAMP_NOT_AVAILABLE, -1)

    partitions = list(
        map(lambda p: TopicPartition("test", p), range(0, 100, 3)))
    kc.assign(partitions)

    kc.unassign()

    kc.commit(async=True)

    try:
        kc.commit(async=False)
    except KafkaException as e:
        assert e.args[0].code() in (KafkaError._TIMED_OUT,
                                    KafkaError._NO_OFFSET)

    # Get current position, should all be invalid.
    kc.position(partitions)
    assert len([p for p in partitions if p.offset == -1001]) == len(partitions)

    try:
        offsets = kc.committed(partitions, timeout=0.001)
    except KafkaException as e:
        assert e.args[0].code() == KafkaError._TIMED_OUT

    kc.close()
Exemplo n.º 36
0
class ConsumerServer:
    """Defines the base kafka consumer class"""
    def __init__(
        self,
        topic_name_pattern,
        offset_earliest=False,
        sleep_secs=1.0,
        consume_timeout=0.1,
    ):
        """Creates a consumer object for asynchronous use"""
        self.topic_name_pattern = topic_name_pattern
        self.sleep_secs = sleep_secs
        self.consume_timeout = consume_timeout
        self.offset_earliest = offset_earliest

        self.broker_properties = {
            "bootstrap.servers": "PLAINTEXT://localhost:9093",
            "group.id": "GRP.0",
            "max.poll.interval.ms": 600000
        }

        self.consumer = Consumer({
            "bootstrap.servers":
            self.broker_properties.get("bootstrap.servers"),
            "group.id":
            self.broker_properties.get("group.id"),
            "max.poll.interval.ms":
            self.broker_properties.get("max.poll.interval.ms")
        })

        self.consumer.subscribe([self.topic_name_pattern],
                                on_assign=self.on_assign)

    def on_assign(self, consumer, partitions):
        """Callback for when topic assignment takes place"""

        if self.offset_earliest is True:
            for partition in partitions:
                partition.offset = confluent_kafka.OFFSET_BEGINNING

        logger.info("partitions assigned for %s", self.topic_name_pattern)
        self.consumer.assign(partitions)

    async def consume(self):
        """Asynchronously consumes data from kafka topic"""
        while True:
            num_results = 1
            while num_results > 0:
                num_results = self._consume()
            await gen.sleep(self.sleep_secs)

    def _consume(self):
        """Polls for a message. Returns 1 if a message was received, 0 otherwise"""

        message = self.consumer.poll(self.consume_timeout)
        if message is None:
            logger.debug("No message received on topic %s",
                         self.topic_name_pattern)
            return 0
        elif message.error() is not None:
            logger.error(
                f"Error in receiving message from topic {self.topic_name_pattern}: {message.error()}"
            )
            return 1
        else:
            logger.debug(
                f"Received message from topic {self.topic_name_pattern}:\n {message.value()}"
            )
            return 1

    def close(self):
        """Cleans up any open kafka consumers"""
        self.consumer.unsubscribe()