示例#1
0
def create_kafka_consumer(group_id='nsb-salgsdvh',
                          autoCommit=False,
                          environment='None'):
    username = '******'
    if environment == 'PROD':
        password = dbutils.secrets.get('KAFKA_ENTUR_PROD', 'password')
        config = {
            'bootstrap.servers': 'bootstrap.prod-ext.kafka.entur.io:9095',
            'group.id': group_id,
            'enable.auto.commit': autoCommit,
            'auto.offset.reset': 'earliest',
            'schema.registry.url':
            'http://schema-registry.prod-ext.kafka.entur.io:8001',
            'security.protocol': 'SASL_SSL',
            'sasl.mechanism': 'SCRAM-SHA-512',
            'sasl.username': username,
            'sasl.password': password
        }

    elif environment == 'TEST':
        password = dbutils.secrets.get('KAFKA_ENTUR', 'password')
        config = {
            'bootstrap.servers': 'bootstrap.test-ext.kafka.entur.io:9095',
            'group.id': group_id,
            'enable.auto.commit': autoCommit,
            'auto.offset.reset': 'earliest',
            'schema.registry.url':
            'http://schema-registry.test-ext.kafka.entur.io:8001',
            'security.protocol': 'SASL_SSL',
            'sasl.mechanism': 'SCRAM-SHA-512',
            'sasl.username': username,
            'sasl.password': password
        }
    c = AvroConsumer(config)

    return c
    def __init__(
        self,
        topic_name_pattern,
        message_handler,
        is_avro=True,
        offset_earliest=False,
        sleep_secs=1.0,
        consume_timeout=0.1,
    ):
        """Creates a consumer object for asynchronous use"""
        self.topic_name_pattern = topic_name_pattern
        self.message_handler = message_handler
        self.sleep_secs = sleep_secs
        self.consume_timeout = consume_timeout
        self.offset_earliest = offset_earliest

        self.broker_properties = {
            'bootstrap.servers': 'PLAINTEXT://localhost:9094',
            'group.id': topic_name_pattern,
            'default.topic.config': {
                'auto.offset.reset': 'earliest'
            }
        }

        # Create the Consumer, using the appropriate type.
        if is_avro is True:
            self.broker_properties[
                "schema.registry.url"] = "http://localhost:8081"
            self.consumer = AvroConsumer(self.broker_properties)
        else:
            self.consumer = Consumer(self.broker_properties)
            pass

        # Configure the AvroConsumer and subscribe to the topics. Make sure to think about
        # how the `on_assign` callback should be invoked.
        self.consumer.subscribe([topic_name_pattern], on_assign=self.on_assign)
示例#3
0
    def __init__(
        self,
        topic_name_pattern,
        message_handler,
        is_avro=True,
        offset_earliest=False,
        sleep_secs=1.0,
        consume_timeout=0.1,
    ):
        """Creates a consumer object for asynchronous use"""
        self.topic_name_pattern = topic_name_pattern
        self.message_handler = message_handler
        self.sleep_secs = sleep_secs
        self.consume_timeout = consume_timeout
        self.offset_earliest = offset_earliest

        self.broker_properties = {
            "bootstrap.servers": "\
                PLAINTEXT://localhost:9092,\
                PLAINTEXT://localhost:9093,\
                PLAINTEXT://localhost:9094\
            ",
            "group.id": f"{self.topic_name_pattern}",
            "auto.offset.reset": "earliest" if offset_earliest else "latest"
        }

        # Create the Consumer, using the appropriate type.
        if is_avro is True:
            self.broker_properties[
                "schema.registry.url"] = "http://localhost:8081"
            self.consumer = AvroConsumer(self.broker_properties)
        else:
            self.consumer = Consumer(self.broker_properties)

        self.consumer.subscribe([self.topic_name_pattern],
                                on_assign=self.on_assign)
示例#4
0
def consume(topic, conf):
    """
        Consume User records
    """
    from confluent_kafka.avro import AvroConsumer
    from confluent_kafka.avro.serializer import SerializerError

    print("Consuming user records from topic {} with group {}. ^c to exit.".
          format(topic, conf["group.id"]))

    c = AvroConsumer(conf, reader_value_schema=record_schema)
    c.subscribe([topic])

    while True:
        try:
            msg = c.poll(1)

            # There were no messages on the queue, continue polling
            if msg is None:
                continue

            if msg.error():
                print("Consumer error: {}".format(msg.error()))
                continue

            record = User(msg.value())
            print("name: {}\n\tfavorite_number: {}\n\tfavorite_color: {}\n".
                  format(record.name, record.favorite_number,
                         record.favorite_color))
        except SerializerError as e:
            # Report malformed record, discard results, continue polling
            print("Message deserialization failed {}".format(e))
            continue
        except KeyboardInterrupt:
            break

    print("Shutting down consumer..")
    c.close()
示例#5
0
def confluent_kafka_consumer(args):

    msg_consumed_count = 0
    conf = {'bootstrap.servers': args.bootstrap_servers,
            'group.id': uuid.uuid1(),
            'session.timeout.ms': 6000,
            'default.topic.config': {
                'auto.offset.reset': 'latest'
            },
            'schema.registry.url': args.schema_registry
    }

    consumer = AvroConsumer(conf)
    consumer.subscribe([args.topic])

    while True:

        try:
            msg = consumer.poll(1)
            if msg:
                msg_consumed_count += 1
                nlp_processing(msg.value())
        except SerializerError as e:
            print('Message deserialization failed for {}: {}'.format(msg, e))
            break

        if msg is None:
            continue

        if msg.error():
            print("AvroConsumer error: {}".format(msg.error()))
            continue

        if msg_consumed_count >= int(args.msg_count):
            break

    consumer.close()
示例#6
0
def consume(conf, schema_record):
    """
        Consume MetadataChangeEvent records
    """
    from confluent_kafka.avro import AvroConsumer
    from confluent_kafka.avro.serializer import SerializerError

    print(
        "Consuming MetadataChangeEvent records from topic {} with group {}. ^c to exit."
        .format(topic, conf["group.id"]))

    c = AvroConsumer(conf, reader_value_schema=avro.load(schema_record))
    c.subscribe([topic])

    while True:
        try:
            msg = c.poll(1)

            # There were no messages on the queue, continue polling
            if msg is None:
                continue

            if msg.error():
                print("Consumer error: {}".format(msg.error()))
                continue

            record = MetadataChangeEvent(msg.value())
            print("avro_event: {}\n\t".format(record.value))
        except SerializerError as e:
            # Report malformed record, discard results, continue polling
            print("Message deserialization failed {}".format(e))
            continue
        except KeyboardInterrupt:
            break

    print("Shutting down consumer..")
    c.close()
示例#7
0
def testAvroConsumer(topic):
    config = {
        'bootstrap.servers': "localhost:9092",
        'group.id': 'groupid',
        'schema.registry.url': 'http://127.0.0.1:8081'
    }
    c = AvroConsumer(config)
    c.subscribe([topic])
    running = True
    msg = None
    while running:
        try:
            msg = c.poll(10)
            if msg:
                if not msg.error():
                    print(msg.value())
                elif msg.error().code() != KafkaError._PARTITION_EOF:
                    print(msg.error())
                    running = False
        except SerializerError as e:
            print("Message deserialization failed for %s: %s" % (msg, e))
            running = False

    c.close()
def consume(topic, conf):
    """
        Consume User records
    """
    from confluent_kafka.avro import AvroConsumer
    from confluent_kafka.avro.serializer import SerializerError

    print("Consuming user records from topic {} with group {}. ^c to exit.".format(topic, conf["group.id"]))

    c = AvroConsumer(conf, reader_value_schema=record_schema)
    c.subscribe([topic])

    while True:
        try:
            msg = c.poll(1)

            # There were no messages on the queue, continue polling
            if msg is None:
                continue

            if msg.error():
                print("Consumer error: {}".format(msg.error()))
                continue

            record = User(msg.value())
            print("name: {}\n\tfavorite_number: {}\n\tfavorite_color: {}\n".format(
                record.name, record.favorite_number, record.favorite_color))
        except SerializerError as e:
            # Report malformed record, discard results, continue polling
            print("Message deserialization failed {}".format(e))
            continue
        except KeyboardInterrupt:
            break

    print("Shutting down consumer..")
    c.close()
class AvroConsumer:

    DEFAULT_CONFIG = {
        'api.version.request': True,
        'client.id': socket.gethostname(),
        'default.topic.config': {
            'auto.offset.reset': 'latest'
        },
        'enable.auto.commit': False,
        'fetch.error.backoff.ms': 0,
        'fetch.wait.max.ms': 100,
        'fetch.min.bytes': 1000,
        'log.connection.close': False,
        'log.thread.name': False,
        'session.timeout.ms': 6000,
        'statistics.interval.ms': 15000,
        'queued.max.messages.kbytes': '10485',
        'fetch.message.max.bytes': '10485',
    }

    def __init__(
        self, config, get_message: Callable = get_message,
        error_handler: Callable = default_error_handler
    ) -> None:
        stop_on_eof = config.pop('stop_on_eof', False)
        poll_timeout = config.pop('poll_timeout', 0.1)
        self.non_blocking = config.pop('non_blocking', False)

        self.config = {**self.DEFAULT_CONFIG, **config}
        self.config['error_cb'] = get_callback(
            config.pop('error_cb', None), default_error_cb
        )
        self.config['stats_cb'] = get_callback(
            config.pop('stats_cb', None), default_stats_cb
        )
        self.topics = self._get_topics(self.config)

        logger.info("Initializing consumer", config=self.config)
        self.consumer = ConfluentAvroConsumer(self.config)
        self.consumer.subscribe(self.topics)

        self._generator = self._message_generator()

        self._get_message = partial(
            get_message, consumer=self.consumer, error_handler=error_handler,
            timeout=poll_timeout, stop_on_eof=stop_on_eof
        )

    def __getattr__(self, name):
        return getattr(self.consumer, name)

    def __iter__(self):
        return self

    def __next__(self):
        try:
            return next(self._generator)
        except EndOfPartition:
            raise StopIteration

    def __enter__(self):
        return self

    def __exit__(self, exc_type, exc_value, tb):
        # the only reason a consumer exits is when an
        # exception is raised.
        #
        # close down the consumer cleanly accordingly:
        #  - stops consuming
        #  - commit offsets (only on auto commit)
        #  - leave consumer group
        logger.info("Closing consumer")
        self.consumer.close()

    def _message_generator(self):
        while True:
            message = self._get_message()
            if message is None:
                if self.non_blocking:
                    yield None
                continue

            statsd.increment(f'{base_metric}.consumer.message.count.total')
            yield Message(message)

    def _get_topics(self, config):
        topics = config.pop('topics', None)
        assert topics is not None, "You must subscribe to at least one topic"

        if not isinstance(topics, list):
            topics = [topics]

        return topics

    @property
    def is_auto_commit(self):
        return self.config.get('enable.auto.commit', True)
示例#10
0
class Consumer:
    def __init__(self,
                 broker,
                 schema_registry,
                 topic=None,
                 logging_enabled=False,
                 group_id=None,
                 auto_commit=True):
        """
        Initialiser for Confluent Consumer using AvroConsumer. 
        Each consumer can only be subscribed to one topic 
        Parameters
        ----------
        broker: str
            The URL of the broker (example: 'localhost:9092')
        schema_registry: str
            The URL of the confluent Schema Registry endpoint (example: 'http://localhost:8081')
        topic: str
            The topic to subscribe too
        logger: Logger object, Optional
            The logger object which will be used to log messages if provided
        groupId: str, Optional
            An optional groupId which can be used to loadbalance consumers default is "asgard"
        """
        if group_id is None:
            new_hash = hashlib.sha1()
            new_hash.update(str(time.time()).encode("utf-8"))
            group_id = new_hash.hexdigest()

        self.__consumer = AvroConsumer({
            "bootstrap.servers": broker,
            "group.id": group_id,
            "schema.registry.url": schema_registry,
            "enable.auto.commit": auto_commit
        })
        self.__consumer_non_avro = KafkaConsumer({
            "bootstrap.servers":
            broker,
            "group.id":
            group_id + "0",
            "enable.auto.commit":
            auto_commit
        })
        self.auto_commit = auto_commit
        if not auto_commit:
            self.consumed_messages = PriorityQueue()
        if not topic is None:
            self.subscribe_to_topic(topic)
        else:
            self.topic = None
        if logging_enabled:
            self.logger = logging.getLogger(__name__)
        else:
            self.logger = None

    def consume(self, timeout=1):
        """
        Method to consume and return message if exists and can be deserialized
        Returns
        -------
        str
            The recieved message payload as a string
        None
            No message has been recieved or an error has occured
        """
        if not self.topic is None:
            msg = None
            non_avro = False
            try:
                msg = self.__consumer.poll(timeout)
            except SerializerError as e:
                try:
                    msg = self.__consumer_non_avro.poll(timeout)
                    non_avro = True
                except Exception as e:
                    self.__log_msg(
                        "Message deserialization has failed {}: {}".format(
                            msg, e),
                        "See the following stack trace",
                        f"{traceback.format_exc()}",
                        delimeter="\n",
                        level="ERROR")
            except RuntimeError as e:
                self.__log_msg(
                    "The consumer has been closed and cannot recieve messages",
                    level="ERROR")
            except Exception as e:
                self.__log_msg("An unkown error has occured {}".format(e),
                               "See the following stack trace",
                               f"{traceback.format_exc()}",
                               delimeter="\n",
                               level="ERROR")

            if not msg is None:
                if msg.error():
                    self.__log_msg("AvroConsumer error: {}".format(
                        msg.error()),
                                   level="ERROR")
                else:
                    if not self.auto_commit:
                        self.consumed_messages.put_nowait(msg)
                    if non_avro:
                        data_to_be_returned = json.loads(msg.value().decode())
                    else:
                        data_to_be_returned = msg.value()
                    return data_to_be_returned
        else:
            raise ValueError("Consumer is currently not subscribed to a topic")

    def __enter__(self):
        return self.__consumer

    def __exit__(self, *args):
        self.close()

    def __log_msg(
        self,
        *messages,
        level="NOTSET",
        delimeter=" ",
    ):
        levels = {
            "CRITICAL": logging.CRITICAL,
            "ERROR": logging.ERROR,
            "WARNING": logging.WARNING,
            "INFO": logging.INFO,
            "DEBUG": logging.DEBUG,
            "NOTSET": logging.NOTSET
        }
        msg = delimeter.join(messages)
        if self.logger is not None:
            if level not in levels:
                raise ValueError(
                    f"level {level} is not valid must be one of {list(levels.keys())}"
                )
            self.logger.log(levels[level], msg)
        else:
            if level is not None:
                print(f"LOGGED MESSAGE: {msg}")
            else:
                print(f"{level}: {msg}")

    def commit(self, asynchronous=True):
        if not self.auto_commit and not self.consumed_messages.empty():
            msg = self.consumed_messages.get_nowait()
            self.__consumer.commit(msg, asynchronous=asynchronous)

    def list_topics(self, topic=None, timeout=1):
        try:
            metadata = self.__consumer.list_topics(topic, timeout)
            topics = metadata.topics
            return list(topics.keys())
        except Exception as e:
            self.__log_msg(
                f"An unknown error has occured when trying to list topics {e}",
                "ERROR")
            self.logger.debug(e)

    def check_if_topic_exists(self, topic, timeout=1):
        topic_list = self.list_topics(timeout=timeout)
        if topic_list is not None:
            return topic in topic_list

    def subscribe_to_topic(self, topic):
        try:
            self.__consumer_non_avro.subscribe([topic],
                                               on_assign=self.__assign)
            self.__consumer.subscribe([topic], on_assign=self.__assign)
            self.topic = topic
            return True
        except Exception as e:
            self.__log_msg(
                "An unknown error {}".format(e),
                "occured while trying to subscribe to topic {}".format(topic),
                delimeter=" ",
                level="ERROR")
            return False

    def __assign(self, consumer, partitions):
        for p in partitions:
            p.offset = consumer.get_watermark_offsets(p)[1] - 1
        self.__consumer.assign(partitions)
        self.__consumer_non_avro.assign(partitions)

    def close(self):
        """
        Close the consumer, Once called this object cannot be reused
        """
        self.__consumer.close()
示例#11
0
from confluent_kafka import KafkaError
from confluent_kafka.avro import AvroConsumer
from confluent_kafka.avro.serializer import SerializerError
import time
from concurrent.futures import ThreadPoolExecutor, wait
import boto3
from config import conf
from datetime import datetime

KAFKA_BROKER_URL = conf.KAFKA_CONFIG["bootstrap.servers"]
SCHEMA_REGISTRY_URL = conf.KAFKA_CONFIG["schema.registry.url"]

c = AvroConsumer({
    'bootstrap.servers': KAFKA_BROKER_URL,
    'group.id': 'testGrp_2',
    'schema.registry.url': SCHEMA_REGISTRY_URL
})

c.subscribe(['userPratilipiEventAggregated'])

pool = ThreadPoolExecutor(20)

dynamodb = boto3.resource('dynamodb', region_name=conf.AWS_REGION)

table = dynamodb.Table('user_pratilipi')


def pushEventToDB(msg):
    data = msg["data"]
    pratilipi_id = msg["pratilipiId"]
    date = datetime.fromtimestamp(data["readTime"] /
示例#12
0
class KafkaConsumer:
    def __init__(self,
                 kafka_brokers="",
                 scram_username="",
                 scram_password="",
                 topic_name="",
                 schema_registry_url="",
                 autocommit=True):
        self.kafka_brokers = kafka_brokers
        self.scram_username = scram_username
        self.scram_password = scram_password
        self.topic_name = topic_name
        self.schema_registry_url = schema_registry_url
        self.kafka_auto_commit = autocommit

    # See https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md
    def prepareConsumer(self, groupID="pythonconsumers"):
        options = {
            'bootstrap.servers': self.kafka_brokers,
            'group.id': groupID,
            'auto.offset.reset': 'earliest',
            'schema.registry.url': self.schema_registry_url,
            'enable.auto.commit': self.kafka_auto_commit,
            'security.protocol': 'SASL_SSL',
            'sasl.mechanisms': 'SCRAM-SHA-512',
            'sasl.username': self.scram_username,
            'sasl.password': self.scram_password,
            'ssl.ca.location': os.environ['PEM_CERT'],
            'schema.registry.ssl.ca.location': os.environ['PEM_CERT']
        }
        # Print the configuration
        print("--- This is the configuration for the Avro consumer: ---")
        print(options)
        print("---------------------------------------------------")
        # Create the Avro consumer
        self.consumer = AvroConsumer(options)
        # Subscribe to the topic
        self.consumer.subscribe([self.topic_name])

    def traceResponse(self, msg):
        print(
            '[Message] - Next message consumed from {} partition: [{}] at offset {} with key {} and value {}'
            .format(msg.topic(), msg.partition(), msg.offset(), msg.key(),
                    msg.value()))

    # Polls for next event
    def pollNextEvent(self):
        # Poll for messages
        msg = self.consumer.poll(timeout=10.0)
        # Validate the returned message
        if msg is None:
            print("[INFO] - No new messages on the topic")
        elif msg.error():
            if ("PARTITION_EOF" in msg.error()):
                print("[INFO] - End of partition")
            else:
                print("[ERROR] - Consumer error: {}".format(msg.error()))
        else:
            # Print the message
            msgStr = self.traceResponse(msg)

    def close(self):
        self.consumer.close()
示例#13
0
from pprint import pprint

# get saved keys
import generalconfig as cfg

confluentKey = cfg.pwd['confluentKey']
confluentSecret = cfg.pwd['confluentSecret']
confluentSchemaRegistryKey = cfg.pwd['confluentSchemaRegistryKey']
confluentSchemaRegistrySecret = cfg.pwd['confluentSchemaRegistrySecret']

c = AvroConsumer({
    'bootstrap.servers': "pkc-41973.westus2.azure.confluent.cloud:9092",
    'security.protocol': 'SASL_SSL',
    'sasl.mechanism': 'PLAIN',
    'sasl.username': confluentKey,
    'sasl.password': confluentSecret,
    'schema.registry.url': 'https://psrc-4r0k9.westus2.azure.confluent.cloud',
    'schema.registry.basic.auth.credentials.source': 'USER_INFO',
    'schema.registry.basic.auth.user.info':
    f'{confluentSchemaRegistryKey}:{confluentSchemaRegistrySecret}',
    'group.id': '1',
})

c.subscribe(['covid'])

for i in range(10):
    try:
        msg = c.poll(timeout=20)

    except SerializerError as e:
        print("Message deserialization failed, skipping bad message.")
        continue

def dump(obj):
    for attr in dir(obj):
        print("obj.%s = %r" % (attr, getattr(obj, attr)))


def get_schema_id(topic):
    response = requests.get(
        'http://35.223.91.93:8081/subjects/{}/versions/latest'.format(topic))
    return json.loads(response.text)


c = AvroConsumer({
    'bootstrap.servers': '10.128.0.8,10.128.0.9,10.128.0.10',
    'schema.registry.url': 'http://35.223.91.93:8081',
    'group.id': 'groupid'
})

c.subscribe(['redshift'])

while True:
    try:
        msg = c.poll(1)

    except SerializerError as e:
        print("Message deserialization failed for {}: {}".format(msg, e))
        break

    if msg is None:
        continue
示例#15
0
#!/usr/bin/env python
from confluent_kafka import avro
from confluent_kafka.avro import AvroConsumer
from confluent_kafka.avro.serializer import SerializerError

conf = {
    "bootstrap.servers": "10.227.52.244:31090,10.227.52.244:31091,10.227.52.244:31092",
    "schema.registry.url": "http://10.227.52.244:30553",
    "group.id": "testeiei"
}
topic="ingester"

key_schema = avro.load("./schemas/{}-key.avsc".format(topic))
value_schema = avro.load("./schemas/{}-value.avsc".format(topic))
# c = AvroConsumer(conf, reader_key_schema=key_schema, reader_value_schema=value_schema)
c = AvroConsumer(conf)
c.subscribe([topic])

while True:
    try:
        msg = c.poll(1)
        # There were no messages on the queue, continue polling
        if msg is None:
            print(".")
            continue

        if msg.error():
            print("Consumer error: {}".format(msg.error()))
            continue
        print(msg.key(), msg.value())
    except SerializerError as e:
示例#16
0
from confluent_kafka import KafkaError
from confluent_kafka.avro import AvroConsumer
from confluent_kafka.avro.serializer import SerializerError
from confluent_kafka import OFFSET_BEGINNING
from elasticsearch import Elasticsearch

es = Elasticsearch([{'host': 'localhost', 'port': 9200}])
es.indices.create(index='prgs1min', ignore=400)

c = AvroConsumer({
    'default.topic.config': {
        'auto.offset.reset': 'earliest'
    },
    'bootstrap.servers': 'ip adress:9092',
    'group.id': 'test-consumer-group',
    'schema.registry.url': 'http://localhost:8081'
})


def my_assign(consumer, partitions):
    for p in partitions:
        p.offset = OFFSET_BEGINNING
    print('assign', partitions)
    consumer.assign(partitions)


c.subscribe(['PRGS1MIN'], on_assign=my_assign)
running = True

while running:
    try:
示例#17
0
from confluent_kafka import KafkaError
from confluent_kafka.avro import AvroConsumer
from confluent_kafka.avro.serializer import SerializerError

c = AvroConsumer({
    'bootstrap.servers': 'kafka1:9092',
    'auto.offset.reset': 'earliest',
    'group.id': 'local',
    'schema.registry.url': 'http://kafka-schema-registry:8081'
})
c.subscribe(['my_topic2'])
running = True
while running:
    try:
        msg = c.poll(10)
        if msg:
            if not msg.error():
                print(msg.value())
            elif msg.error().code() != KafkaError._PARTITION_EOF:
                print(msg.error())
                running = False
    except SerializerError as e:
        print("Message deserialization failed for %s: %s" % (msg, e))
        running = False

c.close()
示例#18
0
 def setup(self):
     self.consumer = AvroConsumer(self.get_consumer_settings())
     self.consumer.subscribe([self.get_topic_name()])
示例#19
0
class KafkaWorker(BaseWorker):
    topic_name = None
    consumer_name = None
    consumer_settings = {}
    commit_on_complete = False
    async_commit = True
    poll_timeout = 0
    auto_offset_reset = 'earliest'
    consumer = None
    last_message = None

    def setup(self):
        self.consumer = AvroConsumer(self.get_consumer_settings())
        self.consumer.subscribe([self.get_topic_name()])

    def teardown(self):
        if self.consumer:
            self.consumer.close()

    def get_topic_name(self):
        return self.topic_name or utils.config_missing('topic name')

    def get_consumer_name(self):
        return self.consumer_name or utils.generate_random_consumer_name()

    def get_consumer_settings(self):
        default_settings = {
            'group.id': self.get_consumer_name(),
            'default.topic.config': {'auto.offset.reset': self.auto_offset_reset},
            'enable.auto.commit': False,
            'bootstrap.servers': utils.get_broker_url(),
            'schema.registry.url': utils.get_schema_registry_url(),
            'session.timeout.ms': 10000,
            'heartbeat.interval.ms': 1000,
            'api.version.request': True,
        }
        return utils.generate_client_settings(default_settings, self.consumer_settings)

    def poll(self):
        message = self.consumer.poll(timeout=self.poll_timeout)
        if message is not None:
            self.last_message = message
        return message

    def get_partitions(self):
        partitions = self.consumer.assignment()
        if not partitions:
            self.poll()
            partitions = self.consumer.assignment()
        return partitions

    def get_current_offsets(self):
        return self.consumer.position(self.get_partitions())

    def reset_consumer_offsets(self, offset):
        self.consumer.assign([TopicPartition(tp.topic, tp.partition, offset)
                              for tp in self.get_partitions()])

    def seek_to_timestamp(self, timestamp):
        timestamp_ms = dt_to_unix_ms(timestamp)
        partitions = self.get_partitions()
        for tp in partitions:
            tp.offset = timestamp_ms
        partitions = self.consumer.offsets_for_times(partitions)
        self.consumer.assign(partitions)

    def handle(self):
        message = self.poll()

        if message is None:
            self.wait()

        elif message.error():
            if message.error().code() == KafkaError._PARTITION_EOF:
                self.partition_eof(message)

            else:
                raise KafkaException(message.error())

        else:
            self._consume(message)

            if self.commit_on_complete:
                self.commit()

        self.done()

    def commit(self):
        if not self.consumer_settings.get('enable.auto.commit'):
            self.consumer.commit(async=self.async_commit)

    def _consume(self, message):
        self.consume_message(MessageValue(message))

    def consume_message(self, message):
        pass

    def partition_eof(self, message):
        pass
示例#20
0
from confluent_kafka import KafkaError
from confluent_kafka.avro import AvroConsumer
from confluent_kafka.avro.serializer import SerializerError

import json, ast

c = AvroConsumer({
    'bootstrap.servers': "temple.di.uoa.gr:9092",
    'group.id': 'groupid',
    'schema.registry.url': "http://temple.di.uoa.gr:8081",
    'auto.offset.reset': 'latest'
})

c.subscribe(["FusionAlert"])

while True:
    try:
        msg = c.poll(10)

    except SerializerError as e:
        print("Message deserialization failed for {}: {}".format(msg, e))
        break

    if msg is None:
        continue

    if msg.error():
        if msg.error().code() == KafkaError._PARTITION_EOF:
            continue
        else:
            print(msg.error())
示例#21
0
 def setup(self):
     self.consumer = AvroConsumer(self.get_consumer_settings())
     self.consumer.subscribe([self.get_topic_name()])
示例#22
0
 def __init__(self, brokers, group, schema_registry_url):
     self.avro_consumer = AvroConsumer({
         'bootstrap.servers': brokers,
         'group.id': group,
         'auto.offset.reset': 'earliest',
         'schema.registry.url': schema_registry_url})
示例#23
0
    def create_consumer(
        self,
        group_id=None,
        server="127.0.0.1",
        port="9092",
        enable_auto_commit=True,
        auto_offset_reset="latest",
        schema_registry_url=None,
        auto_create_topics=True,
        key_deserializer=None,
        value_deserializer=None,
        legacy=True,
        **kwargs
    ):
        """Create Kafka Consumer and returns its `group_id` as string.

        Keyword Arguments:
        - ``server``: (str): IP address / domain, that the consumer should
            contact to bootstrap initial cluster metadata.
            Default: `127.0.0.1`.
        - ``port`` (int): Port number. Default: `9092`.
        - ``group_id`` (str or uuid.uuid4() if not set) : name of the consumer group
            to join for dynamic partition assignment (if enabled), and to use for fetching and
            committing offsets. If None, unique string is generated  (via uuid.uuid4())
            and offset commits are disabled. Default: `None`.
        - ``auto_offset_reset`` (str): A policy for resetting offsets on
            OffsetOutOfRange errors: `earliest` will move to the oldest
            available message, `latest` will move to the most recent. Any
            other value will raise the exception. Default: `latest`.
        - ``enable_auto_commit`` (bool): If true the consumer's offset will be
            periodically committed in the background. Default: `True`.
        - ``schema_registry_url`` (str): *required* for Avro Consumer.
            Full URL to avro schema endpoint.
        - ``auto_create_topics`` (bool): Consumers no longer trigger auto creation of topics,
            will be removed in future release. Default: `True`.
        - ``legacy`` (bool): Activate SerializingConsumer if 'False' else
            AvroConsumer (legacy) is used. Will be removed when confluent-kafka will deprecate this.
            Default: `True`.

        Note:
        Configuration parameters are described in more detail at
        https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md \n
        """
        if group_id is None:
            group_id = str(uuid.uuid4())

        if schema_registry_url and legacy:
            consumer = AvroConsumer({
                'bootstrap.servers': '{}:{}'.format(server, port),
                'group.id': group_id,
                'enable.auto.commit': enable_auto_commit,
                'allow.auto.create.topics': auto_create_topics,
                'auto.offset.reset': auto_offset_reset,
                'schema.registry.url': schema_registry_url,
                **kwargs})
        elif not legacy:
            consumer = DeserializingConsumer({
                'bootstrap.servers': '{}:{}'.format(server, port),
                'group.id': group_id,
                'enable.auto.commit': enable_auto_commit,
                'auto.offset.reset': auto_offset_reset,
                'key.deserializer': key_deserializer,
                'value.deserializer': value_deserializer,
                **kwargs})
        else:
            consumer = Consumer({
                'bootstrap.servers': '{}:{}'.format(server, port),
                'group.id': group_id,
                'enable.auto.commit': enable_auto_commit,
                'allow.auto.create.topics': auto_create_topics,
                'auto.offset.reset': auto_offset_reset,
                **kwargs})

        self.consumers[group_id] = consumer
        return group_id
示例#24
0
def run_consumer(group_id: str, broker_url: str, registry_url: str,
                 subscription_name: str, apiestas_url: str):
    logger.info(
        f"Kafka consumer listening to {broker_url} and subscribed to {subscription_name}"
    )
    consumer = AvroConsumer({
        'group.id': group_id,
        'bootstrap.servers': broker_url,
        'schema.registry.url': registry_url
    })
    consumer.subscribe([subscription_name])
    while True:
        try:
            msg = consumer.poll(10)

        except SerializerError as e:
            logger.error("Message deserialization failed for {}: {}".format(
                msg, e))
            break

        if msg is None:
            continue

        if msg.error():
            logger.error("AvroConsumer error: {}".format(msg.error()))
            continue

        # Find surebets
        doc = json.loads(msg.value())
        parse_mongo_dates(doc)
        try:
            match = MatchInDB(**doc)
        except ValidationError as e:
            logger.error(
                f"Error generating model from Kafka event. Event: {msg.value()}"
            )
            continue

        # We do not process surebets that has been created less than one minute ago
        if not (match.surebets and datetime.utcnow() -
                min(surebet.created_at
                    for surebet in match.surebets) < timedelta(minutes=1)):
            surebets = SureBetsFinder(match.bets).find_all()
            if surebets:
                logger.info(
                    f"{len(surebets)} surebets found for match '{match.slug}'")
                data = json.dumps(list(map(lambda x: x.dict(), surebets)))
                try:
                    response = requests.post(
                        f"{apiestas_url}/api/matches/{match.slug}/surebets",
                        data=data)
                    if response.status_code != 200:
                        logger.error(
                            f"There was an error submitting the surebet. Data: {data}"
                        )
                except Exception:
                    logger.error(
                        f"There was an error submitting the surebet. Data: {data}",
                        exc_info=True)
            else:
                logger.info(f"No surebets found for match '{match.slug}'")

    consumer.close()

    def check_surebets_recency(match: MatchInDB):
        min(surebet.created_at for surebet in surebets)
    zookeeperServer = hostname
    zookeeperPort = 2185
    zookeeper = zookeeperServer + ":" + str(zookeeperPort)

    schemaRegistryServer = hostname
    schemaRegistryPort = 8081

    topic = 'my_topic'

    SCHEMA_REGISTRY_URL = 'http://' + schemaRegistryServer + ':' + str(
        schemaRegistryPort)

    print """\nINFO: Kakfa Connection Details:

        Kafka Broker : %s
        Zookeeper    : %s
        Topic        : %s  """ % (kafkaBroker, zookeeper, topic)

    conf = {
        'bootstrap.servers': kafkaBroker,
        'group.id': 'mygroup',
        'schema.registry.url': SCHEMA_REGISTRY_URL
    }

    print "\nINFO: Create Client obj for Kafka Connection"

    avroConsume_client = AvroConsumer(conf)

    consume_messages(avroConsume_client, topic=topic)
示例#26
0
 "name": "Student",
 "fields": [
     {"name": "first_name", "type": ["null", "string"], "default": null, "doc": "First name of the student"},
     {"name": "last_name", "type": ["null", "string"], "default": null, "doc": "Last name of the student"},
     {"name": "class", "type": "int", "default": 1, "doc": "Class of the student"}
 ]
}
"""

value_schema = avro.loads(value_schema_str)

c = AvroConsumer(
    {
        'bootstrap.servers':
        'peter-kafka01.foo.bar,peter-kafka02.foo.bar,peter-kafka03.foo.bar',
        'group.id': 'python-groupid02',
        'auto.offset.reset': 'earliest',
        'schema.registry.url': 'http://peter-kafka03.foo.bar:8081'
    },
    reader_value_schema=value_schema)

c.subscribe(['peter-avro2'])

while True:
    try:
        msg = c.poll(10)

    except SerializerError as e:
        print("Message deserialization failed for {}: {}".format(msg, e))
        break
示例#27
0
from confluent_kafka.avro import AvroConsumer
import phonenumbers
from phonenumbers.phonenumberutil import region_code_for_number
import pandas as pd
import pycountry
c = AvroConsumer({
    'bootstrap.servers': '10.254.34.155:9092',
    'group.id': 'test-grupa',
    'schema.registry.url': 'http://10.254.34.155:8081',
    'default.topic.config': {
        'auto.offset.reset': 'smallest'
    }
})
c.subscribe(['tel_buildset_fer'])


def translated():
    return True


def get_df():

    while True:
        try:
            first_msg = c.poll(10)
            mapa = first_msg.value()
            print(mapa)
            mapa['CODE'] = region_code_for_number(
                phonenumbers.parse('+' + str(int(mapa['CALLEE']))))
            callee_country = pycountry.countries.get(
                alpha_2=region_code_for_number(
示例#28
0

if __name__ == '__main__':

    default_group_name = "default-consumer-group"

    # Push messages to Transactions Topic
    # producer = AvroProducer(bootstrap_servers=KAFKA_BROKER_URL, value_serializer=lambda value: json.dumps(value).encode())

    consumer_config = {
        "bootstrap.servers": KAFKA_BROKER_URL,
        "schema.registry.url": SCHEMA_REGISTRY_URL,
        "group.id": default_group_name,
        "auto.offset.reset": "earliest"}

    consumer = AvroConsumer(consumer_config)
    # Consumer: Read from Transactions Topic
    print("Created Consumer")

    consumer.subscribe([TRANSACTIONS_TOPIC])
    print(f"Consumer subscribed to {TRANSACTIONS_TOPIC}")
    
    raw_messages = []

    # As we read messages pushed from producer to the consumer - classify: 
    while True:
        try:
            message = consumer.poll(5)
            print(f"Polled for message: {message}")
        except SerializerError as e:
            # print(f"Exception while trying to poll messages: {e}")
示例#29
0
from confluent_kafka.avro import AvroConsumer
from confluent_kafka.avro.serializer import SerializerError


c = AvroConsumer({
    'bootstrap.servers': 'localhost:32772,localhost:32773,localhost:32774',
    'group.id': 'avro-consumer',
    'schema.registry.url': 'http://localhost:8081',
    'auto.offset.reset': 'earliest'
})

c.subscribe(['my_topic'])

while True:
    try:
        msg = c.poll(1.0)

    except SerializerError as e:
        print("Message deserialization failed for {}: {}".format(msg, e))
        break

    if msg is None:
        continue

    if msg.error():
        print("AvroConsumer error: {}".format(msg.error()))
        continue

    print(msg.value())

c.close()
from confluent_kafka import KafkaError, avro
from confluent_kafka.avro import AvroConsumer, AvroProducer
from confluent_kafka.avro.serializer import SerializerError
from confluent_kafka import TopicPartition
from math import ceil
from calculation import calculate_average, calculate_ranking
from schema import key_schema_avg_str, key_schema_rank_str, value_schema_avg_str, value_schema_rank_str
import datetime

c = AvroConsumer({
    'bootstrap.servers': 'localhost:9092',
    'group.id': 'join-ksql',
    'schema.registry.url': 'http://0.0.0.0:8081'
    })

# c.assign([Partition])

c.subscribe(['students_result_source'])

value_schema_avg = avro.loads(value_schema_avg_str)
key_schema_avg = avro.loads(key_schema_avg_str)
value_schema_rank = avro.loads(value_schema_rank_str)
key_schema_rank = avro.loads(key_schema_rank_str)

producer_avg = AvroProducer({
    'bootstrap.servers': 'localhost:9092',
    'schema.registry.url': 'http://0.0.0.0:8081'
}, default_key_schema=key_schema_avg, default_value_schema=value_schema_avg)

producer_rank = AvroProducer({
    'bootstrap.servers': 'localhost:9092',
from confluent_kafka import KafkaError
from confluent_kafka.avro import AvroConsumer
from confluent_kafka.avro.serializer import SerializerError


c = AvroConsumer({
    'bootstrap.servers': 'localhost:9092',
    'group.id': 'groupid2',
    'schema.registry.url': 'http://127.0.0.1:8081',
    'default.topic.config': {
        'auto.offset.reset': 'smallest'
    }
})


c.subscribe(['passenger2'])

while True:
    try:
        msg = c.poll(1)

    except SerializerError as e:
        print("Message deserialization failed for {}: {}".format(msg, e))
        break

    if msg is None:
        print("no message")
        continue

    if msg.error():
        print("AvroConsumer error: {}".format(msg.error()))
def avro_consumer(urls, topics, uav_name):
	
	c = AvroConsumer(urls)
	c.subscribe(topics)
	
	check_time = 0
	msges = []
	c_topic = ""
	loop = len(topics)
	
	while True:
	    try:
	        msg = c.poll(10)
	    except SerializerError as e:
	      #  print("Message deserialization failed for {}: {}".format(msg, e))
	        break

	    if msg is None:
	        continue

	    if msg.error():
	      #  print("AvroConsumer error: {}".format(msg.error()))
	        continue
	    m = msg.value()

	    if (m["header"]['sourceSystem'])== uav_name:

	    	if check_time==0 or check_time==m["header"]["time"]:
	    		c.unsubscribe()
	    		check_time=m["header"]["time"]
	    		c_topic = msg.topic()
	    		d = topics.index(c_topic)
	    		del topics[d]
	    		msges.append(msg)
	    		loop = loop - 1
	    		if loop==0:
	    			break
	    		c.subscribe(topics)
	    			    		
	c.close()
	
	# return the list of consumed avro messages (one for each topic - same timestamp)

	return(msges)
示例#33
0
class KafkaConsumer:
    def __init__(self,
                 kafka_env='LOCAL',
                 kafka_brokers="",
                 kafka_apikey="",
                 topic_name="",
                 schema_registry_url="",
                 autocommit=True):
        self.kafka_env = kafka_env
        self.kafka_brokers = kafka_brokers
        self.kafka_apikey = kafka_apikey
        self.topic_name = topic_name
        self.schema_registry_url = schema_registry_url
        self.kafka_auto_commit = autocommit

    # See https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md
    def prepareConsumer(self, groupID="pythonconsumers"):
        options = {
            'bootstrap.servers': self.kafka_brokers,
            'group.id': groupID,
            'auto.offset.reset': 'earliest',
            'schema.registry.url': self.schema_registry_url,
            'enable.auto.commit': self.kafka_auto_commit,
        }
        if (self.kafka_env != 'LOCAL' and self.kafka_env != 'MINIKUBE'):
            options['security.protocol'] = 'SASL_SSL'
            options['sasl.mechanisms'] = 'PLAIN'
            options['sasl.username'] = '******'
            options['sasl.password'] = self.kafka_apikey
        if (self.kafka_env == 'OCP'):
            options['ssl.ca.location'] = os.environ['PEM_CERT']
            options['schema.registry.ssl.ca.location'] = os.environ['PEM_CERT']
        print("This is the configuration for the consumer:")
        print(options)
        self.consumer = AvroConsumer(options)
        self.consumer.subscribe([self.topic_name])

    def traceResponse(self, msg):
        print(
            '@@@ pollNextOrder {} partition: [{}] at offset {} with key {}:\n\tvalue: {}'
            .format(msg.topic(), msg.partition(), msg.offset(), msg.key(),
                    msg.value()))

    def pollNextEvent(self, keyID, keyname):
        gotIt = False
        while not gotIt:
            try:
                msg = self.consumer.poll(timeout=10.0)
            except SerializerError as e:
                print("Message deserialization failed for {}: {}".format(
                    msg, e))
                break
            if msg is None:
                continue
            if msg.error():
                print("Consumer error: {}".format(msg.error()))
                if ("PARTITION_EOF" in msg.error()):
                    gotIt = True
                continue
            self.traceResponse(msg)
            if (msg.key()[keyname] == keyID):
                gotIt = True

    def close(self):
        self.consumer.close()