Exemplo n.º 1
0
 def __init__(self, kafkaHost=None, kafkaGroup=None, kafkaTopic=None, 
              consumerType=NON_CONSUMER, consumerPartitions=[],
              producerType=NON_PRODUCER, producerPartitions=[]):
     self.kafkaHost = kafkaHost
     self.kafkaGroup = kafkaGroup
     self.kafkaTopic = kafkaTopic
     self.consumerPartitions = consumerPartitions
     self.producerPartitions = producerPartitions
     self.connect(kafkaHost)
     try:
         if producerType == self.SIMPLE_PRODUCER:
             self.producer = SimpleProducer(self.kafkaClient, async=False, req_acks=KeyedProducer.ACK_NOT_REQUIRED)
         elif producerType == self.FIXED_PRODUCER:
             self.producer = FixedProducer(self.kafkaClient, producerPartitions[0], async=False, req_acks=KeyedProducer.ACK_NOT_REQUIRED)
         elif producerType == self.USER_PRODUCER:
             self.producer = UserProducer(self.kafkaClient, async=False, req_acks=KeyedProducer.ACK_NOT_REQUIRED)
         elif producerType == self.NON_PRODUCER:
             self.producer = None
         else:
             raise Exception("wrong producer type {}".format(producerType))
         
         if consumerType == self.SIMPLE_CONSUMER:
             if not consumerPartitions:
                 self.consumer = SimpleConsumer(self.kafkaClient, self.kafkaGroup, self.kafkaTopic)
             else:
                 self.consumer = SimpleConsumer(self.kafkaClient, self.kafkaGroup, 
                                                self.kafkaTopic, partitions=self.consumerPartitions)
             logger.debug('consumer is listening on {}@{}'.format(self.kafkaTopic, self.consumerPartitions))
         elif consumerType == self.NON_CONSUMER:
             self.consumer = None
         else:
             raise Exception("wrong consumer type {}".format(consumerType))
             
     except Exception as e:
         logger.warning('Exception {}'.format(e))
         logger.debug(traceback.format_exc())
         self.consumer = None
         self.producer = None
         self.kafkaClient = None
Exemplo n.º 2
0
 def assert_kafka(self, expected_file_name):
     #print("reading server "+config.KAFKA_SERVER+" on topic:"+config.KAFKA_TOPIC)
     kafka_client = KafkaClient(config.KAFKA_SERVER)
     #simpleconsumer takes its timeout in seconds... hence 1, allowing all messages to appear but not hanging too long
     consumer = SimpleConsumer(kafka_client, b"my_group", config.KAFKA_TOPIC.encode("utf8"),
                                   iter_timeout=1)
     #seek(1,0) means to start processing from the begining (the 0) but skip 1 message from this index  (the first msg)
     #we bypass the first message since it is just used to autostart the topic
     consumer.seek(1, 0)
     actual = ""
     for msg in consumer:
         #the linefeed at the end is not really needed but it makes for more readable error reports
         actual += msg.message.value.decode('utf8')+"\n"
     expected = pkg_resources.resource_string(__name__, expected_file_name).decode('utf8')
     t_assert.equal(actual, expected)
Exemplo n.º 3
0
 def set_consumer_partition(self, consumerPartitions):
     if not consumerPartitions:
         logger.warning('consumer partitions can not be empty')
         return
         
     if self.consumer:
         self.consumer.commit()
         self.consumer.stop()
         self.consumer = None
     self.consumerPartitions = consumerPartitions
     try:
         self.consumer = SimpleConsumer(self.kafkaClient, self.kafkaGroup,
                                        self.kafkaTopic, partitions=self.consumerPartitions)
     except KafkaError as e:
         logger.warning('Exception {}'.format(e))
         logger.debug(traceback.format_exc())
         self.reconnect()
     except Exception as e:
         logger.warning('Exception {}'.format(e))
         logger.debug(traceback.format_exc())
Exemplo n.º 4
0
def test_handler():
    topic = '%s-mutations' % (uuid.uuid1().hex,)

    codec = BinaryCodec(Message)

    client = KafkaClient('kafka')
    producer = SimpleProducer(client)
    writer = KafkaWriter(producer, topic, codec)

    inputs = list(transaction)
    writer.push(inputs)

    consumer = SimpleConsumer(client, 'test', topic, auto_offset_reset='smallest')

    outputs = map(
        codec.decode,
        map(
            operator.attrgetter('message.value'),
            list(consumer.get_messages(count=3)),
        ),
    )

    assert outputs == inputs
Exemplo n.º 5
0
def _mp_consume(client, group, topic, message_queue, size, events,
                **consumer_options):
    """
    A child process worker which consumes messages based on the
    notifications given by the controller process

    NOTE: Ideally, this should have been a method inside the Consumer
    class. However, multiprocessing module has issues in windows. The
    functionality breaks unless this function is kept outside of a class
    """

    # Initial interval for retries in seconds.
    interval = 1
    while not events.exit.is_set():
        try:
            # Make the child processes open separate socket connections
            client.reinit()

            # We will start consumers without auto-commit. Auto-commit will be
            # done by the master controller process.
            consumer = SimpleConsumer(client,
                                      group,
                                      topic,
                                      auto_commit=False,
                                      auto_commit_every_n=None,
                                      auto_commit_every_t=None,
                                      **consumer_options)

            # Ensure that the consumer provides the partition information
            consumer.provide_partition_info()

            while True:
                # Wait till the controller indicates us to start consumption
                events.start.wait()

                # If we are asked to quit, do so
                if events.exit.is_set():
                    break

                # Consume messages and add them to the queue. If the controller
                # indicates a specific number of messages, follow that advice
                count = 0

                message = consumer.get_message()
                if message:
                    while True:
                        try:
                            message_queue.put(
                                message, timeout=FULL_QUEUE_WAIT_TIME_SECONDS)
                            break
                        except queue.Full:
                            if events.exit.is_set(): break

                    count += 1

                    # We have reached the required size. The controller might have
                    # more than what he needs. Wait for a while.
                    # Without this logic, it is possible that we run into a big
                    # loop consuming all available messages before the controller
                    # can reset the 'start' event
                    if count == size.value:
                        events.pause.wait()

                else:
                    # In case we did not receive any message, give up the CPU for
                    # a while before we try again
                    time.sleep(NO_MESSAGES_WAIT_TIME_SECONDS)

            consumer.stop()

        except KafkaError as e:
            # Retry with exponential backoff
            log.error(
                "Problem communicating with Kafka (%s), retrying in %d seconds..."
                % (e, interval))
            time.sleep(interval)
            interval = interval * 2 if interval * 2 < MAX_BACKOFF_SECONDS else MAX_BACKOFF_SECONDS