Python SimpleConsumer.seek примеры использования

Язык программирования: Python

Пространство имен/Пакет: kafka.consumer.simple

Класс/Тип: SimpleConsumer

Метод/Функция: seek

Примеров на hotexamples.com: 5

Python SimpleConsumer.seek - 5 примеров найдено. Это лучшие примеры Python кода для kafka.consumer.simple.SimpleConsumer.seek, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

SimpleConsumer(5)

get_message(2)

get_messages(2)

seek(2)

stop(2)

commit(1)

provide_partition_info(1)

Пример #1

Показать файл

Файл: kafka_log_tests.py Проект: mukatee/pypro

 def assert_kafka(self, expected_file_name):
     #print("reading server "+config.KAFKA_SERVER+" on topic:"+config.KAFKA_TOPIC)
     kafka_client = KafkaClient(config.KAFKA_SERVER)
     #simpleconsumer takes its timeout in seconds... hence 1, allowing all messages to appear but not hanging too long
     consumer = SimpleConsumer(kafka_client, b"my_group", config.KAFKA_TOPIC.encode("utf8"),
                                   iter_timeout=1)
     #seek(1,0) means to start processing from the begining (the 0) but skip 1 message from this index  (the first msg)
     #we bypass the first message since it is just used to autostart the topic
     consumer.seek(1, 0)
     actual = ""
     for msg in consumer:
         #the linefeed at the end is not really needed but it makes for more readable error reports
         actual += msg.message.value.decode('utf8')+"\n"
     expected = pkg_resources.resource_string(__name__, expected_file_name).decode('utf8')
     t_assert.equal(actual, expected)

Пример #2

Показать файл

 def assert_kafka(self, expected_file_name):
     #print("reading server "+config.KAFKA_SERVER+" on topic:"+config.KAFKA_TOPIC)
     kafka_client = KafkaClient(config.KAFKA_SERVER)
     #simpleconsumer takes its timeout in seconds... hence 1, allowing all messages to appear but not hanging too long
     consumer = SimpleConsumer(kafka_client, b"my_group", config.KAFKA_TOPIC.encode("utf8"),
                                   iter_timeout=1)
     #seek(1,0) means to start processing from the begining (the 0) but skip 1 message from this index  (the first msg)
     #we bypass the first message since it is just used to autostart the topic
     consumer.seek(1, 0)
     actual = ""
     for msg in consumer:
         #the linefeed at the end is not really needed but it makes for more readable error reports
         actual += msg.message.value.decode('utf8')+"\n"
     expected = pkg_resources.resource_string(__name__, expected_file_name).decode('utf8')
     t_assert.equal(actual, expected)

Пример #3

Показать файл

Файл: k.py Проект: Balhau/pyutils

    def run(self):
        client = KafkaClient("localhost:9092")

        consumer = SimpleConsumer(client, "test-group", "topic.test.min.v1",
            max_buffer_size = None,
        )

        self.valid = 0
        self.invalid = 0

        m_len=len("Hello master wayne" * 10)

        consumer.seek(0,0)

        for message in consumer:
            try:
                if len(message.message.value) == m_len:
                    self.valid += 1
                else:
                    self.invalid += 1
            except:
                print "Reset Offset"
                consumer.seek(0,0)

Пример #4

Показать файл

class KafkaBroker(object):
    USER_PRODUCER = 0
    FIXED_PRODUCER = 1
    SIMPLE_PRODUCER = 2
    NON_PRODUCER = 3
    SIMPLE_CONSUMER = 0
    NON_CONSUMER = 1
    SOCKET_TIMEOUT = 60 #second
    
    def __init__(self, kafkaHost=None, kafkaGroup=None, kafkaTopic=None, 
                 consumerType=NON_CONSUMER, consumerPartitions=[],
                 producerType=NON_PRODUCER, producerPartitions=[]):
        self.kafkaHost = kafkaHost
        self.kafkaGroup = kafkaGroup
        self.kafkaTopic = kafkaTopic
        self.consumerPartitions = consumerPartitions
        self.producerPartitions = producerPartitions
        self.connect(kafkaHost)
        try:
            if producerType == self.SIMPLE_PRODUCER:
                self.producer = SimpleProducer(self.kafkaClient, async=False, req_acks=KeyedProducer.ACK_NOT_REQUIRED)
            elif producerType == self.FIXED_PRODUCER:
                self.producer = FixedProducer(self.kafkaClient, producerPartitions[0], async=False, req_acks=KeyedProducer.ACK_NOT_REQUIRED)
            elif producerType == self.USER_PRODUCER:
                self.producer = UserProducer(self.kafkaClient, async=False, req_acks=KeyedProducer.ACK_NOT_REQUIRED)
            elif producerType == self.NON_PRODUCER:
                self.producer = None
            else:
                raise Exception("wrong producer type {}".format(producerType))
            
            if consumerType == self.SIMPLE_CONSUMER:
                if not consumerPartitions:
                    self.consumer = SimpleConsumer(self.kafkaClient, self.kafkaGroup, self.kafkaTopic)
                else:
                    self.consumer = SimpleConsumer(self.kafkaClient, self.kafkaGroup, 
                                                   self.kafkaTopic, partitions=self.consumerPartitions)
                logger.debug('consumer is listening on {}@{}'.format(self.kafkaTopic, self.consumerPartitions))
            elif consumerType == self.NON_CONSUMER:
                self.consumer = None
            else:
                raise Exception("wrong consumer type {}".format(consumerType))
                
        except Exception as e:
            logger.warning('Exception {}'.format(e))
            logger.debug(traceback.format_exc())
            self.consumer = None
            self.producer = None
            self.kafkaClient = None
            
    def close(self):
        if self.consumer:
            self.consumer.commit()
            self.consumer.stop()
            self.consumer = None
        if self.producer:
            self.producer.stop()
            self.producer = None
        if self.kafkaClient:
            self.kafkaClient.close()
            self.kafkaClient = None
        logger.info('Kafka connection closed')
    
    def connect(self, kafkaHost, countdown=COUNT_DOWN):
        if countdown == 0:
            logger.error('kafka server can not be connected in {} times'.format(COUNT_DOWN))
            return
            
        try:
            self.kafkaClient = KafkaClient(kafkaHost, timeout=self.SOCKET_TIMEOUT)
        except:
            logger.warning('try to connect kafka server again {}'.format(countdown))
            self.connect(kafkaHost, countdown - 1)
            
        logger.info('Kafka client connected {}'.format(self.kafkaClient))
        
    def reconnect(self, countdown=COUNT_DOWN):
        if countdown == 0:
            logger.error('kafka server can not be connected in {} times'.format(COUNT_DOWN))
            return
            
        try:
            self.kafkaClient.reinit()
        except:
            self.reconnect(countdown - 1)
        
    def produce(self, op, name, **kwargs):
        # TODO: when name is None, the operation is propagated to all partitions 
        if not op or not name:
            logger.warning('op or name must not be empty')
            return
        try:
            dictMessage = dict(kwargs)
            dictMessage['op'] = op
            dictMessage['name'] = name
            encodedMessage = simplejson.dumps(dictMessage)
            self.producer.send(self.kafkaTopic, name, encodedMessage)
        except KafkaError as e:
            logger.warning('Exception {}'.format(e))
            logger.debug(traceback.format_exc())
            self.reconnect()
        except Exception as e:
            logger.warning('Exception {}'.format(e))
            logger.debug(traceback.format_exc())

    def echo(self, message=''):
        self.produce('Echo', 'testing', message=message)
        
    def set_consumer_partition(self, consumerPartitions):
        if not consumerPartitions:
            logger.warning('consumer partitions can not be empty')
            return
            
        if self.consumer:
            self.consumer.commit()
            self.consumer.stop()
            self.consumer = None
        self.consumerPartitions = consumerPartitions
        try:
            self.consumer = SimpleConsumer(self.kafkaClient, self.kafkaGroup,
                                           self.kafkaTopic, partitions=self.consumerPartitions)
        except KafkaError as e:
            logger.warning('Exception {}'.format(e))
            logger.debug(traceback.format_exc())
            self.reconnect()
        except Exception as e:
            logger.warning('Exception {}'.format(e))
            logger.debug(traceback.format_exc())
    
    def is_consumer_ready(self):
        if not self.consumer:
            logger.warning('Consumer is not ready yet')
            return False
        return True
        
    def seek(self, skip):
        if self.is_consumer_ready():
            if skip == -1:
                self.consumer.seek(0, 2)
            else:
                self.consumer.seek(skip, 1)
            
    def commit(self):
        if self.is_consumer_ready():
            self.consumer.commit()
            
    def consume_one(self):
        if not self.is_consumer_ready():
            return None
            
        try:
            message = self.consumer.get_message()
            if not message:
                return None
            logger.debug('received message {}'.format(message.message.value))
            return message.message.value
        except Exception as e:
            logger.warning('Exception {}'.format(e))
            logger.debug(traceback.format_exc())
            self.reconnect()
        return None
        
    def consume(self, count=10):
        if not self.is_consumer_ready():
            return []
            
        try:
            messages = self.consumer.get_messages(count=count)
            return [message.message.value for message in messages]
        except Exception as e:
            logger.warning('Exception {}'.format(e))
            logger.debug(traceback.format_exc())
            self.reconnect()
        return []

Пример #5

Показать файл

Файл: kafka.py Проект: disqus/pgshovel

    def consume(self, state):
        """
        Starts consuming from the configured Kafka topic given a possible
        existing ``pgshovel.interfaces.replication_pb2:State``.

        If the provided ``state`` does not contain a
        ``stream_state.consumer_state`` value, the ``KafaStream`` attempts to
        start reading from the Kafka topic after first "priming" the stream.
        Priming involves consuming messages from the topic looking for a
        ``BeginOperation``. Any message that is not a ``BeginOperation`` is
        dropped, until a ``BeginOperation`` is seen or the ``prime_threshold``
        is reached. The latter of which raises a
        ``pgshovel.streams.utilities:UnableToPrimeError`` error.

        In general, it makes sense to set the ``prime_threshold`` to high enough
        value that exceeds the max transaction size you expect to see in your
        data.  Generally speaking a  ``prime_threshold`` can effectively be
        infinite (and you could construct the stream with ``float('inf')``,
        however the lack of a ``BeginOperation`` in the stream would cause the
        stream to hang, possibly forever, so the ``prime_threshold`` config
        parameter is provided to raise an exception if this unexpected behavior
        occurs.
        """
        consumer = SimpleConsumer(KafkaClient(self.hosts), None, self.topic)

        # You can only update one offset at a time with kafka-python, plus
        # dealing with reconstituting global order from a partitioned stream is
        # hard we don't really need to deal with it right now.
        assert len(consumer.offsets) is 1

        decoded = imap(
            lambda (offset, msg): (offset, self.codec.decode(msg.value)),
            consumer
        )

        if state.stream_state.HasField('consumer_state'):
            # Seeking to a direct offset was not in the PyPI release of
            # kafka-python when this was implemented:
            # https://github.com/mumrah/kafka-python/pull/412
            current = consumer.offsets[0]
            offset = state.stream_state.consumer_state.offset + 1
            delta = offset - current
            logger.debug('Moving to previous replication log offset: %s (current position: %s)...', offset, current)
            consumer.seek(delta, 1)
            assert consumer.offsets[0] == offset
        else:
            logger.info('No consumer state provided, will attempt to prime to begin BeginOperation')
            # The call to ``prime_for_batch_start`` "primes" the stream by
            # dropping messages until it sees a message that is an intance of
            # one of the types in
            # ``pgshovel.replication.validation.TRANSACTION_START_EVENT_TYPES``
            decoded = prime_for_batch_start(
                max_messages=self.prime_threshold,
                stream=decoded
            )

        for offset, message in decoded:
            state = validate_state(state, offset, message)
            # XXX: This is necessary because of a bug in protocol buffer oneof.
            state = type(state).FromString(state.SerializeToString())
            yield state, offset, message