Exemplo n.º 1
0
class KafkaConsumer(object):
    def __init__(self, group_id, topic):
        self.client = Consumer({
            'bootstrap.servers': KAFKA_SERVER_HOSTS,
            'group.id': group_id,
            'session.timeout.ms': 6000,
            'default.topic.config': {
                'auto.offset.reset': 'smallest'
            }
        })
        self.topic = topic

    def query_kafka(self, max_part):
        for p_id in range(0, max_part):
            tp = TopicPartition(self.topic, p_id)
            committed = self.client.committed([tp])
            watermark_offsets = self.client.get_watermark_offsets(tp)
            c_offset = committed[0].offset
            partition = committed[0].partition
            min_offset = watermark_offsets[0]
            max_offset = watermark_offsets[1]
            print("%d %d %d %d %d" % (partition, min_offset, c_offset,
                                      max_offset, max_offset - c_offset))

    def reset_kafka(self, tps):
        for tp in tps:
            self.client.assign([tp])
            print(tp)
            self.client.poll()

    def close(self):
        self.client.close()
Exemplo n.º 2
0
def run_consumer(tp):
    c = Consumer({
        'bootstrap.servers': 'localhost:9092',
        'group.id': 'CarloGroup',
        'default.topic.config': {
            'auto.offset.reset': 'smallest'
        }
    })
    c.assign([tp])
    run = True
    while run:
        msg = c.poll(timeout=1000)
        if not msg.value().decode("utf-8"):
            print("EOF " + msg.topic() + " " + str(msg.partition()))
            run = False
        elif msg.error():
            print("ERRORE:" + msg.error())
            run = False
        print(msg.value().decode("utf-8"))
        try:
            write_on_kafka_out(msg)
        except IndexError:
            print("IndexError")
            pass
    c.close()
Exemplo n.º 3
0
async def consume(topic_name):
    c = Consumer({
        "bootstrap.servers": "PLAINTEXT://localhost:9092",
        "group.id": "0",
        # "auto.offset.reset": "beginning"
    })

    topic_partition = TopicPartition(topic_name, 0, OFFSET_BEGINNING)

    # c.subscribe([topic_name])
    # c.subscribe([topic_name], on_assign=on_assign)
    c.assign([topic_partition])

    assignment = c.assignment()
    print(f"assignment: {assignment}")

    position = c.position([topic_partition])
    print(f"position: {position}")

    while True:
        message = c.poll(1.0)
        if message is None:
            print("no message received by consumer")
        elif message.error() is not None:
            print(f"error from consumer {message.error()}")
        else:
            print(f"consumed message {message.key()}: {message.value()}")
        await asyncio.sleep(1)
def test_any_method_after_close_throws_exception():
    """ Calling any consumer method after close should thorw a RuntimeError
    """
    c = Consumer({'group.id': 'test',
                  'enable.auto.commit': True,
                  'enable.auto.offset.store': False,
                  'socket.timeout.ms': 50,
                  'session.timeout.ms': 100})

    c.subscribe(["test"])
    c.unsubscribe()
    c.close()

    with pytest.raises(RuntimeError) as ex:
        c.subscribe(['test'])
    assert ex.match('Consumer closed')

    with pytest.raises(RuntimeError) as ex:
        c.unsubscribe()
    assert ex.match('Consumer closed')

    with pytest.raises(RuntimeError) as ex:
        c.poll()
    assert ex.match('Consumer closed')

    with pytest.raises(RuntimeError) as ex:
        c.consume()
    assert ex.match('Consumer closed')

    with pytest.raises(RuntimeError) as ex:
        c.assign([TopicPartition('test', 0)])
    assert ex.match('Consumer closed')

    with pytest.raises(RuntimeError) as ex:
        c.unassign()
    assert ex.match('Consumer closed')

    with pytest.raises(RuntimeError) as ex:
        c.assignment()
    assert ex.match('Consumer closed')

    with pytest.raises(RuntimeError) as ex:
        c.commit()
    assert ex.match('Consumer closed')

    with pytest.raises(RuntimeError) as ex:
        c.committed([TopicPartition("test", 0)])
    assert ex.match('Consumer closed')

    with pytest.raises(RuntimeError) as ex:
        c.position([TopicPartition("test", 0)])
    assert ex.match('Consumer closed')

    with pytest.raises(RuntimeError) as ex:
        c.seek([TopicPartition("test", 0, 0)])
    assert ex.match('Consumer closed')

    with pytest.raises(RuntimeError) as ex:
        lo, hi = c.get_watermark_offsets(TopicPartition("test", 0))
    assert ex.match('Consumer closed')
Exemplo n.º 5
0
def consume_verify_weird_messages(topic, partition, bootstrap_servers):
    c = Consumer({'bootstrap.servers': bootstrap_servers,
                  'group.id': 'group2',
                  'enable.auto.commit': False,
                  'auto.offset.reset': 'beginning'})
    c.assign([TopicPartition(topic, partition, 0)])

    count = 3
    msgs = []
    while len(msgs) < count:
        msg = c.poll(1.0)
        if msg is None:
            continue
        if msg.error():
            print("Consumer error: {}".format(msg.error()))
            exit(255)
        msgs.append(msg)

    c.close()

    for i in range(len(msgs)):
        check_msg_equality(i, weird_messages[i], msgs[i])
        for j in range(len(weird_messages)):
            if i != j:
                check_msg_equality(i, weird_messages[j], msgs[i], expectFail=True)
Exemplo n.º 6
0
    def consume(self):
        consumer_kafka_conf = {
            "bootstrap.servers": self.bootstrap_servers,
            'group.id': self.group_id,
            'auto.offset.reset': 'smallest'
        }
        consumer = Consumer(consumer_kafka_conf)
        try:
            consumer.assign(
                [TopicPartition(topic=self.topic, partition=self.partition)])

            while self.stop is False:
                msg = consumer.poll(timeout=1.0)
                if msg is None:
                    sleep(0.2)
                    continue

                if msg.error():
                    if msg.error().code() == KafkaError._PARTITION_EOF:
                        # End of partition event
                        sys.stderr.write(
                            '%% %s [%d] reached end at offset %d\n' %
                            (msg.topic(), msg.partition(), msg.offset()))
                    elif msg.error():
                        raise KafkaException(msg.error())
                else:
                    self.model.train(msg.value().strip(), self.group_id)
                    if self.model.count == self.end:
                        self.stop = True
        finally:
            # Close down consumer to commit final offsets.
            consumer.close()
def test_any_method_after_close_throws_exception():
    """ Calling any consumer method after close should thorw a RuntimeError
    """
    c = Consumer({'group.id': 'test',
                  'enable.auto.commit': True,
                  'enable.auto.offset.store': False,
                  'socket.timeout.ms': 50,
                  'session.timeout.ms': 100})

    c.subscribe(["test"])
    c.unsubscribe()
    c.close()

    with pytest.raises(RuntimeError) as ex:
        c.subscribe(['test'])
    assert 'Consumer closed' == str(ex.value)

    with pytest.raises(RuntimeError) as ex:
        c.unsubscribe()
    assert 'Consumer closed' == str(ex.value)

    with pytest.raises(RuntimeError) as ex:
        c.poll()
    assert 'Consumer closed' == str(ex.value)

    with pytest.raises(RuntimeError) as ex:
        c.consume()
    assert 'Consumer closed' == str(ex.value)

    with pytest.raises(RuntimeError) as ex:
        c.assign([TopicPartition('test', 0)])
    assert 'Consumer closed' == str(ex.value)

    with pytest.raises(RuntimeError) as ex:
        c.unassign()
    assert 'Consumer closed' == str(ex.value)

    with pytest.raises(RuntimeError) as ex:
        c.assignment()
    assert 'Consumer closed' == str(ex.value)

    with pytest.raises(RuntimeError) as ex:
        c.commit()
    assert 'Consumer closed' == str(ex.value)

    with pytest.raises(RuntimeError) as ex:
        c.committed([TopicPartition("test", 0)])
    assert 'Consumer closed' == str(ex.value)

    with pytest.raises(RuntimeError) as ex:
        c.position([TopicPartition("test", 0)])
    assert 'Consumer closed' == str(ex.value)

    with pytest.raises(RuntimeError) as ex:
        c.seek([TopicPartition("test", 0, 0)])
    assert 'Consumer closed' == str(ex.value)

    with pytest.raises(RuntimeError) as ex:
        lo, hi = c.get_watermark_offsets(TopicPartition("test", 0))
    assert 'Consumer closed' == str(ex.value)
Exemplo n.º 8
0
    def __init__(self, topic, group, que_len=180):
        self.topic = topic

        self.conf = {
            'bootstrap.servers': 'localhost:9092',
            'group.id': group,
            'enable.auto.commit': True,
        }

        # the application needs a maximum of 180 data units
        self.data = {
            'time': deque(maxlen=que_len),
            'Latitude': deque(maxlen=que_len),
            'Longitude': deque(maxlen=que_len),
            'Altitude': deque(maxlen=que_len)
        }

        consumer = Consumer(self.conf)
        consumer.subscribe([self.topic])

        # download first 180 messges
        self.partition = TopicPartition(topic=self.topic, partition=0)
        low_offset, high_offset = consumer.get_watermark_offsets(
            self.partition)

        # move offset back on 180 messages
        if high_offset > que_len:
            self.partition.offset = high_offset - que_len
        else:
            self.partition.offset = low_offset

        # set the moved offset to consumer
        consumer.assign([self.partition])

        self.__update_que(consumer)
Exemplo n.º 9
0
def consume(config,
            topic,
            group_id="activity",
            auto_offset_reset="earliest",
            handle_key=0):
    # complete consumer
    config["group.id"] = group_id
    config["auto.offset.reset"] = auto_offset_reset

    # construct consumer.
    consumer = Consumer(config)
    consumer.subscribe([topic])
    if handle_key != 0:
        consumer.assign([TopicPartition(topic, handle_key)])
    total_count = 0

    try:
        while True:
            msg = consumer.poll(1)

            if msg is None:
                logging.warning("Waiting for message or event/error in poll()")
                continue
            elif msg.error():
                logging.error("error: {}".format(msg.error()))
            else:
                total_count += 1
                logging.info(
                    "Consumed record with key {} and value {}, and updated total count to {}"
                    .format(msg.key(), msg.value(), total_count))
    except KeyboardInterrupt:
        pass
    finally:
        logging.info("total consumed {} messages".format(total_count))
        consumer.close()
Exemplo n.º 10
0
def get_latest_applied(client_options, topic_name, read_timeout=1.0):
    client_options.update({
        'auto.offset.reset': 'latest',
        'enable.auto.commit': False,
    })
    c = Consumer(client_options)

    partition = TopicPartition(topic_name, 0)
    low, high = c.get_watermark_offsets(partition)

    if low is not None and high is not None and high > 0:
        last_msg_offset = high - 1
    else:
        last_msg_offset = 0

    partition = TopicPartition(topic_name, 0, last_msg_offset)
    c.assign([partition])

    read = None

    msg = c.consume(num_messages=1, timeout=read_timeout)
    if msg:
        read = msg[0].value().decode('utf-8')
        # print('Read: {}'.format(read))

    c.close()
    return read
Exemplo n.º 11
0
class LogReader:
    def __init__(self, bootstrap):
        self.bootstrap = bootstrap
        self.consumer = None
        self.stream = None

    def init(self, group, topic, partition):
        self.consumer = Consumer({
            "bootstrap.servers": self.bootstrap,
            "group.id": group,
            "enable.auto.commit": False,
            "auto.offset.reset": "earliest",
            "isolation.level": "read_committed"
        })
        self.consumer.assign(
            [TopicPartition(topic, partition, OFFSET_BEGINNING)])
        self.stream = self.stream_gen()

    def stream_gen(self):
        while True:
            msgs = self.consumer.consume(timeout=10)
            for msg in msgs:
                yield msg

    def read_until(self, check, timeout_s):
        begin = time.time()
        while True:
            if time.time() - begin > timeout_s:
                raise KafkaException(KafkaError(KafkaError._TIMED_OUT))
            for msg in self.stream:
                offset = msg.offset()
                value = msg.value().decode('utf-8')
                key = msg.key().decode('utf-8')
                if check(offset, key, value):
                    return
Exemplo n.º 12
0
def consume_topic(broker, topic, start_from_oldest=False, truncate=False):
    consumer = KafkaConsumer({
        "bootstrap.servers":
        broker,
        "group.id":
        f"get-topic-{time.time_ns()}",
        "auto.offset.reset":
        "earliest" if start_from_oldest else "latest",
    })

    metadata = consumer.list_topics(topic)
    if topic not in metadata.topics:
        raise Exception("Topic does not exist")

    topic_partitions = [
        TopicPartition(topic, p) for p in metadata.topics[topic].partitions
    ]

    consumer.assign(topic_partitions)

    while True:
        msg = consumer.poll(0.0)

        if msg:
            value = msg.value()[0:100] if truncate else msg.value()
            print(f"Timestamp: {msg.timestamp()[1]}\n{value}")

        time.sleep(0.1)
Exemplo n.º 13
0
class KafkaQueryConsumer:
    """
    Wraps Kafka library consumer methods which query the
    broker for metadata and poll for single messages.
    It is a thin wrapper but allows a fake to be used
    in unit tests.
    """
    def __init__(self, broker: str):
        # Set "enable.auto.commit" to False, as we do not need to report to the
        # kafka broker where we got to (it usually does this in case of a
        # crash, but we simply restart the process and go and find the last
        # run_start message.
        #
        # Set "queued.min.messages" to 1 as we will consume backwards through
        # the partition one message at a time; we do not want to retrieve
        # multiple messages in the forward direction each time we step
        # backwards by 1 offset
        conf = {
            "bootstrap.servers": broker,
            "group.id": "consumer_group_name",
            "auto.offset.reset": "latest",
            "enable.auto.commit": False,
            "queued.min.messages": 1
        }
        self._consumer = Consumer(**conf)

    def get_topic_partitions(self, topic: str, offset: int = -1):
        metadata = self._consumer.list_topics(topic)
        return [
            TopicPartition(topic, partition[1].id, offset=offset)
            for partition in metadata.topics[topic].partitions.items()
        ]

    def seek(self, partition: TopicPartition):
        """
        Set offset in partition, the consumer will seek to that offset
        """
        self._consumer.seek(partition)

    def poll(self, timeout=2.):
        """
        Poll for a message from Kafka
        """
        return self._consumer.poll(timeout=timeout)

    def get_watermark_offsets(self,
                              partition: TopicPartition) -> Tuple[int, int]:
        """
        Get the offset of the first and last available
        message in the given partition
        """
        return self._consumer.get_watermark_offsets(partition, cached=False)

    def assign(self, partitions: List[TopicPartition]):
        self._consumer.assign(partitions)

    def offsets_for_times(self, partitions: List[TopicPartition]):
        return self._consumer.offsets_for_times(partitions)
def test_basic_api():
    """ Basic API tests, these wont really do anything since there is no
        broker configured. """

    try:
        kc = Consumer()
    except TypeError as e:
        assert str(e) == "expected configuration dict"

    def dummy_commit_cb (err, partitions):
        pass

    kc = Consumer({'group.id':'test', 'socket.timeout.ms':'100',
                   'session.timeout.ms': 1000, # Avoid close() blocking too long
                   'on_commit': dummy_commit_cb})

    kc.subscribe(["test"])
    kc.unsubscribe()

    def dummy_assign_revoke (consumer, partitions):
        pass

    kc.subscribe(["test"], on_assign=dummy_assign_revoke, on_revoke=dummy_assign_revoke)
    kc.unsubscribe()

    msg = kc.poll(timeout=0.001)
    if msg is None:
        print('OK: poll() timeout')
    elif msg.error():
        print('OK: consumer error: %s' % msg.error().str())
    else:
        print('OK: consumed message')

    partitions = list(map(lambda p: TopicPartition("test", p), range(0,100,3)))
    kc.assign(partitions)

    kc.unassign()

    kc.commit(async=True)

    try:
        kc.commit(async=False)
    except KafkaException as e:
        assert e.args[0].code() in (KafkaError._TIMED_OUT, KafkaError._NO_OFFSET)

    # Get current position, should all be invalid.
    kc.position(partitions)
    assert len([p for p in partitions if p.offset == -1001]) == len(partitions)

    try:
        offsets = kc.committed(partitions, timeout=0.001)
    except KafkaException as e:
        assert e.args[0].code() == KafkaError._TIMED_OUT


    kc.close()
Exemplo n.º 15
0
def consume_everything(topic):
    consumer = Consumer({
        "bootstrap.servers": "localhost:9092",
        "group.id": uuid.uuid4()
    })
    topicpart = TopicPartition(topic, 0, 0)
    consumer.assign([topicpart])
    low, high = consumer.get_watermark_offsets(topicpart)

    return consumer.consume(high - 1)
Exemplo n.º 16
0
def poll_everything(topic):
    consumer = Consumer({
        'bootstrap.servers': 'localhost:9092',
        'group.id': uuid.uuid4()
    })
    topicpart = TopicPartition(topic, 0, 0)
    consumer.assign([topicpart])
    low, high = consumer.get_watermark_offsets(topicpart)

    return consumer.consume(high - 1)
Exemplo n.º 17
0
    def initialize_from_kafka(self, kafka_topic: str,
                              kafka_config: Dict[str, Any]) -> None:
        """
        kafka_topic should have type str

        TODO: this method does not fail if client can't connect to host.
        """
        if not kafka_topic:
            return

        print("Fetching state from kafka topic: {}".format(kafka_topic),
              file=sys.stderr)

        def fail_fast(err: Any, _msg: Any) -> None:
            if err:
                raise KafkaException(err)

        conf = kafka_config.copy()
        conf.update({
            "group.id": "dummy_init_group",  # should never be committed
            "enable.auto.commit": False,
            "auto.offset.reset": "earliest",
            "session.timeout.ms": 10000,
        })
        consumer = Consumer(conf)

        # this watermark fetch is mostly to ensure we are connected to broker and
        # fail fast if not, but we also confirm that we read to end below.
        hwm = consumer.get_watermark_offsets(TopicPartition(kafka_topic, 0),
                                             timeout=5.0,
                                             cached=False)
        if not hwm:
            raise Exception(
                "Kafka consumer timeout, or topic {} doesn't exist".format(
                    kafka_topic))

        consumer.assign([TopicPartition(kafka_topic, 0, 0)])
        c = 0
        while True:
            msg = consumer.poll(timeout=2.0)
            if not msg:
                break
            if msg.error():
                raise KafkaException(msg.error())
            # sys.stdout.write('.')
            self.update(msg.value().decode("utf-8"))
            c += 1
        consumer.close()

        # verify that we got at least to HWM
        assert c >= hwm[1]
        print("... got {} state update messages, done".format(c),
              file=sys.stderr)
    def messages(self):
        config = {
            'bootstrap.servers': self.bootstrap_servers,
            "group.id": self.consumer_group,
            'enable.auto.commit': True,
            "fetch.wait.max.ms": 3000,
            "max.poll.interval.ms": 60000,
            'session.timeout.ms': 60000,
            "on_commit": self._on_send_response,
            "default.topic.config": {
                "auto.offset.reset": "latest"
            }
        }
        if self.k_user and self.k_password:
            config['security.protocol'] = 'SASL_PLAINTEXT'
            config['sasl.mechanism'] = 'SCRAM-SHA-256'
            config['sasl.username'] = self.k_user
            config['sasl.password'] = self.k_password

        consumer = Consumer(config)
        if self.from_end:
            offset = OFFSET_END
        elif self.from_stored:
            offset = OFFSET_STORED
        elif self.from_beginning:
            offset = OFFSET_BEGINNING
        elif self.from_invalid:
            offset = OFFSET_INVALID
        # offset = OFFSET_END if self.from_end else OFFSET_BEGINNING
        pt = TopicPartition(self.topic, 0, offset)
        consumer.assign([pt])
        # consumer.seek(pt)

        try:
            while True:
                ret = consumer.consume(num_messages=100, timeout=0.1)
                if ret is None:
                    print("No message Continue!")
                    continue
                for msg in ret:
                    if msg.error() is None:
                        # protobuf binary
                        yield msg.value()
                    elif msg.error():
                        if msg.error().code() == KafkaError._PARTITION_EOF:
                            continue
                    else:
                        raise Exception(msg.error())
        except Exception as e:
            print(e)
            consumer.close()
        except KeyboardInterrupt:
            consumer.close()
Exemplo n.º 19
0
def main():
    # parse and check command line args
    parser = argparse.ArgumentParser(
        epilog=
        """Description:
           Plays and optionaly dumps video from a jpeg topic (a topic that ends with Image.jpg)."""
        , formatter_class=RawTextHelpFormatter
    )
    parser.add_argument("broker", help="The name of the kafka broker.", type=str)
    parser.add_argument("topic", help="The name of topic (*.Image.jpg).", type=str)
    parser.add_argument('-f', "--full_screen", action='store_true')
    parser.add_argument('-d', "--dump", help="if set images are stored in jpg files", action='store_true')
    parser.add_argument('-o', "--offset", type=int, default=-1)
    args = parser.parse_args()
    if not args.topic.endswith(".Image.jpg"):
        raise argparse.ArgumentTypeError('The topic must be a jpeg image topic (should end with .Image.jpg)')

    # handle full screen
    window_name = args.topic
    if args.full_screen:
        cv2.namedWindow(window_name, cv2.WINDOW_NORMAL)
        cv2.setWindowProperty(window_name, cv2.WND_PROP_FULLSCREEN, cv2.WINDOW_FULLSCREEN)

    # calc start time and create consumer
    c = Consumer({'bootstrap.servers': args.broker, 'group.id': 'display', 'auto.offset.reset': 'latest'})
    c.assign([TopicPartition(topic=args.topic, partition=0, offset=args.offset)])

    # read frames and show (or dump) them
    while True:
        msg = c.poll(1.0)

        if msg is None:
            continue
        if msg.error():
            print("Consumer error: {}".format(msg.error()))
            continue

        time = msg.timestamp()[1]
        img = decode_image_message(msg)
        if type(img) == np.ndarray:
            if args.dump:
                cv2.imwrite(args.topic + "_" + str(time) + ".jpg", img)
            cv2.imshow(window_name, img)
        k = cv2.waitKey(33)
        if k == 113:  # The 'q' key to stop
            break
        elif k == -1:  # normally -1 returned,so don't print it
            continue
        else:
            print(f"Press 'q' key for EXIT!")
Exemplo n.º 20
0
    def get_graph_data(self):
        consumer = Consumer(self.conf)
        consumer.subscribe([self.topic])

        # update low and high offsets (don't work without it)
        consumer.get_watermark_offsets(self.partition)

        # set local offset
        consumer.assign([self.partition])

        self.__update_que(consumer)

        # convert data to compatible format
        o = {key: list(value) for key, value in self.data.items()}
        return o
Exemplo n.º 21
0
def get_last_available_status_message(cons: Consumer, status_topic: str):
    """

    :param cons:
    :param status_topic:
    :return: The last status message.
    """
    partitions = cons.assignment()
    _, hi = cons.get_watermark_offsets(partitions[0],
                                       cached=False,
                                       timeout=2.0)
    last_msg_offset = hi - 1
    cons.assign(
        [TopicPartition(status_topic, partition=0, offset=last_msg_offset)])
    status_msg, _ = poll_for_valid_message(cons, expected_file_identifier=None)
    return status_msg
Exemplo n.º 22
0
    def get_consumer(self, topic):
        ''' get_consumer() create a consumer interface and set the offset to the 
        start timestamp. '''

        consumer = Consumer({
            'bootstrap.servers': self.config['DEFAULT']['KafkaServer'],
            'group.id': 'mygroup',
            'client.id': 'client-1',
            'enable.auto.commit': True,
            'session.timeout.ms': 6000,
            'max.poll.interval.ms': 60000000,
            'default.topic.config': {'auto.offset.reset': 'smallest'}
        })
        topicPartitions = [TopicPartition(topic, 0, dt2ts(self.start)*1000 )]
        offsetsTimestamp = consumer.offsets_for_times(topicPartitions)
        consumer.assign(offsetsTimestamp)
        return consumer
Exemplo n.º 23
0
def consume(param):

    kafka_server = param[0]
    topic = param[1]
    partition = int(param[2])
    output_dir = param[3]
    group = param[4]

    c = Consumer({
        'bootstrap.servers': kafka_server,
        'group.id': group,
        'auto.offset.reset': 'earliest'
    })

    c.assign([TopicPartition(topic, partition)])

    while True:
        msg = c.poll(1.0)

        if msg is None:
            continue
        if msg.error():
            if msg.error().name() == '_PARTITION_EOF':
                break
            continue

        bytes_alert = msg.value()

        bytes_io = io.BytesIO(bytes_alert)
        reader = fastavro.reader(bytes_io)
        data = reader.next()
        candid = data['candid']
        candid_str = str(candid)

        try:
            output_path = os.path.join(output_dir, "%s.avro" % (candid_str))
            outfile = open(output_path, "wb")
            outfile.write(bytes_alert)
            outfile.close()
        except:
            logger.error("error %s" % (candid_str))
            traceback.print_exc()

    c.close()
    print("Closing Consumer (%s)" % (str(partition)))
Exemplo n.º 24
0
def consume_verify_messages(topic, partition, start_num, count, bootstrap_servers):
    c = Consumer({'bootstrap.servers': bootstrap_servers,
                  'group.id': 'group2',
                  'enable.auto.commit': False,
                  'auto.offset.reset': 'beginning'})
    c.assign([TopicPartition(topic, partition, 0)])

    num_msg = 0
    while num_msg < count:
        msg = c.poll(1.0)
        if msg is None:
            continue
        if msg.error():
            print("Consumer error: {}".format(msg.error()))
            exit(255)
        verify_messages(start_num + num_msg, partition, msg)
        num_msg += 1
    c.close()
Exemplo n.º 25
0
def most_recent_message(topic, kafka_config):
    """
    Tries to fetch the most recent message from a given topic.

    This only makes sense for single partition topics (it works with only the
    first partition), though could be extended with "last N" behavior.
    """

    print("Fetching most Kafka message from {}".format(topic))

    conf = kafka_config.copy()
    conf.update({
        'group.id': 'worker-init-last-msg',  # should never commit
        'delivery.report.only.error': True,
        'enable.auto.commit': False,
        'default.topic.config': {
            'request.required.acks': -1,
            'auto.offset.reset': 'latest',
        },
    })

    consumer = Consumer(conf)

    hwm = consumer.get_watermark_offsets(TopicPartition(topic, 0),
                                         timeout=5.0,
                                         cached=False)
    if not hwm:
        raise Exception(
            "Kafka consumer timeout, or topic {} doesn't exist".format(topic))
    print("High watermarks: {}".format(hwm))

    if hwm[1] == 0:
        print("topic is new; not 'most recent message'")
        return None

    consumer.assign([TopicPartition(topic, 0, hwm[1] - 1)])
    msg = consumer.poll(2.0)
    consumer.close()
    if not msg:
        raise Exception("Failed to fetch most recent kafka message")
    if msg.error():
        raise KafkaException(msg.error())
    return msg.value()
Exemplo n.º 26
0
def test_on_commit():
    """ Verify that on_commit is only called once per commit() (issue #71) """
    class CommitState(object):
        def __init__(self, topic, partition):
            self.topic = topic
            self.partition = partition
            self.once = True

    def commit_cb(cs, err, ps):
        print('on_commit: err %s, partitions %s' % (err, ps))
        assert cs.once is True
        assert err == KafkaError._NO_OFFSET
        assert len(ps) == 1
        p = ps[0]
        assert p.topic == cs.topic
        assert p.partition == cs.partition
        cs.once = False

    cs = CommitState('test', 2)

    c = Consumer({
        'group.id': 'x',
        'enable.auto.commit': False,
        'socket.timeout.ms': 50,
        'session.timeout.ms': 100,
        'on_commit': lambda err, ps: commit_cb(cs, err, ps)
    })

    c.assign([TopicPartition(cs.topic, cs.partition)])

    for i in range(1, 3):
        c.poll(0.1)

        if cs.once:
            # Try commit once
            try:
                c.commit(asynchronous=False)
            except KafkaException as e:
                print('commit failed with %s (expected)' % e)
                assert e.args[0].code() == KafkaError._NO_OFFSET

    c.close()
Exemplo n.º 27
0
    def get_consumer(self, topics):
        willtry = 0
        while True:
            if willtry > 1000:
                logging.error(f"failed to create consumer: no try left")
                return None
            try:
                ''' get_customer() create a consumer interface and set the offset to the 
                start timestamp. '''
                consumer = Consumer({
                    'bootstrap.servers':
                    self.config["DEFAULT"]["KafkaServer"],
                    'group.id':
                    'mygroup',
                    'client.id':
                    'client-1',
                    'enable.auto.commit':
                    True,
                    'session.timeout.ms':
                    6000,
                    'max.poll.interval.ms':
                    6000000,
                    'default.topic.config': {
                        'auto.offset.reset': 'smallest'
                    },
                })
                topicPartitions = [
                    TopicPartition(topic, 0,
                                   dt2ts(self.start) * 1000)
                    for topic in topics
                ]
                offsetsTimestamp = consumer.offsets_for_times(topicPartitions)
                consumer.assign(offsetsTimestamp)
                logging.debug(f"[get_consumer] successfully create customer")

                return consumer

            except Exception as e:
                logging.error(
                    f"failed to create consumer: {e}, try {willtry}/1000")

            willtry += 1
Exemplo n.º 28
0
    def get_consumer(self) :
        try :
            consumer = Consumer({ 
                'bootstrap.servers': self.config['DEFAULT']['KafkaServer'],
                'group.id': 'mygroup',
                'client.id': 'client-1',
                'enable.auto.commit': True,
                'session.timeout.ms': 6000,
                'default.topic.config': {'auto.offset.reset': 'smallest'},
            })

            topic = f"{self.topic_header}_{self.config['BGPScheduler']['SchedulerTopic']}"
            topicPartitions = [ TopicPartition( topic, self.partition, dt2ts(self.start)*1000 ) ]
            offsetsTimestamp = consumer.offsets_for_times(topicPartitions)
            consumer.assign(offsetsTimestamp)        
            
            return consumer
        except Exception as e :
            logging.error(f"[{topic}] {e}") 
            return 
Exemplo n.º 29
0
def consume():

    c = Consumer(conf)

    num_partitions = len(c.list_topics().topics[topic_name].partitions)
    topic_partitions = []
    for partition_index in range(0, num_partitions - 1):
        topic_partition = TopicPartition(topic_name, partition_index, 0)
        topic_partitions.append(topic_partition)

    c.subscribe([topic_name])
    c.assign(topic_partitions)

    while True:
        msg = c.poll(5)
        if msg is None:
            continue
        if msg.error():
            raise KafkaException(msg.error())
        else:
            print(msg.value())
Exemplo n.º 30
0
class KafkaConsumer(object):
    def __init__(self, **kwargs):
        self.c = Consumer({
            'bootstrap.servers': kwargs['services'],
            'group.id': kwargs['groupid'],
            'socket.timeout.ms': '15000',
            'session.timeout.ms': 10000,
            'default.topic.config': {'auto.offset.reset': 'smallest'},
            'enable.auto.commit': "false"
        })
        #self.c.subscribe([kwargs['topic']])
        self.topic = kwargs['topic']

    def assign(self, part_list):
        partitions = []
        for i in part_list:
            partitions.append(TopicPartition(self.topic, i))
        self.c.assign(partitions)

    def __del__(self):
        self.c.close()
Exemplo n.º 31
0
def dump_topics(topics, maxage):
    c = Consumer({
        'bootstrap.servers': 'localhost',
        'group.id': 'webrequest2',
        'default.topic.config': {
            'auto.offset.reset': 'earliest'
        }
    })
    c2 = KafkaConsumer()

    messages = {}
    noDedup = []
    for t in topics:
        tp = TopicPartition(t, 0)
        epoch_age = (maxage - datetime(
            1970, 1, 1).replace(tzinfo=pytz.utc)).total_seconds() * 1000
        offsets = c2.offsets_for_times({tp: epoch_age})
        offset = [v for v in offsets.values() if v][0][0]
        logger.info("Skipping to {offset}".format(offset=offset))
        partitions = [TopicPartition(t, 0, offset)]
        c.assign(partitions)
        running = True
        while running:
            msg = c.poll(timeout=100)
            if msg:
                data = json.loads(msg.value().decode("utf8"))
                if "timestamp" not in data:
                    "Skip"
                elif not maxage or maxage < parse(data["timestamp"],
                                                  fuzzy=True):
                    key = msg.key()
                    if key:
                        data["key"] = key.decode("utf8")
                        messages[key] = data
                    else:
                        noDedup.append(data)
            else:
                running = False
    c.close()
    return messages.values() + noDedup
def test_on_commit():
    """ Verify that on_commit is only called once per commit() (issue #71) """

    class CommitState(object):
        def __init__(self, topic, partition):
            self.topic = topic
            self.partition = partition
            self.once = True

    def commit_cb(cs, err, ps):
        print('on_commit: err %s, partitions %s' % (err, ps))
        assert cs.once is True
        assert err == KafkaError._NO_OFFSET
        assert len(ps) == 1
        p = ps[0]
        assert p.topic == cs.topic
        assert p.partition == cs.partition
        cs.once = False

    cs = CommitState('test', 2)

    c = Consumer({'group.id': 'x',
                  'enable.auto.commit': False, 'socket.timeout.ms': 50,
                  'session.timeout.ms': 100,
                  'on_commit': lambda err, ps: commit_cb(cs, err, ps)})

    c.assign([TopicPartition(cs.topic, cs.partition)])

    for i in range(1, 3):
        c.poll(0.1)

        if cs.once:
            # Try commit once
            try:
                c.commit(asynchronous=False)
            except KafkaException as e:
                print('commit failed with %s (expected)' % e)
                assert e.args[0].code() == KafkaError._NO_OFFSET

    c.close()
Exemplo n.º 33
0
def morning_notice():
    # 每只股票都创建 1 个 topic,包含 5 个 partition,partition 0 存放 futu 获取的 snapshot,partition 1 存放 futu 的 实时报价,partition 2 存放 futu 的实时 K线,partition 3 存放 futu 的实时 分时,
    # partition 4 存放 futu 的实时 逐比,partition 5 存放 futu 的实时摆盘,partition 6 存放 futu 的实时经纪队列,partition 7-9 暂时空闲
    consumer = Consumer({
        'bootstrap.servers': 'kafka01',
        'group.id': 'test',
        'enable.auto.commit': False,
        'default.topic.config': {
            'auto.offset.reset': 'largest'
        }
    })

    (rise_ratio_list_smallest,
     rise_ratio_list_largest) = consumer.get_watermark_offsets(
         TopicPartition('test', 0))
    (volume_list_smallest,
     volume_list_largest) = consumer.get_watermark_offsets(
         TopicPartition('test', 1))
    try:
        consumer.assign(
            [TopicPartition('test', 0, rise_ratio_list_largest - 1)])
        consumer.seek(TopicPartition('test', 0, rise_ratio_list_largest - 1))
        print(consumer.position([TopicPartition('test', 0)]))
        print(consumer.position([TopicPartition('test', 1)]))
        latest_rise_ratio = consumer.poll(3.0)
        print(consumer.position([TopicPartition('test', 0)]))
        print(consumer.position([TopicPartition('test', 1)]))

        print(latest_rise_ratio)
        consumer.assign([TopicPartition('test', 1, volume_list_largest - 1)])
        consumer.seek(TopicPartition('test', 1, volume_list_largest - 1))
        print(consumer.position([TopicPartition('test', 0)]))
        print(consumer.position([TopicPartition('test', 1)]))
        latest_volume = consumer.poll(3.0).value()
        print(consumer.position([TopicPartition('test', 0)]))
        print(consumer.position([TopicPartition('test', 1)]))
        print(latest_volume)
    finally:
        consumer.close()
def test_basic_api():
    """ Basic API tests, these wont really do anything since there is no
        broker configured. """

    try:
        kc = Consumer()
    except TypeError as e:
        assert str(e) == "expected configuration dict"

    def dummy_commit_cb(err, partitions):
        pass

    kc = Consumer({'group.id': 'test', 'socket.timeout.ms': '100',
                   'session.timeout.ms': 1000,  # Avoid close() blocking too long
                   'on_commit': dummy_commit_cb})

    kc.subscribe(["test"])
    kc.unsubscribe()

    def dummy_assign_revoke(consumer, partitions):
        pass

    kc.subscribe(["test"], on_assign=dummy_assign_revoke, on_revoke=dummy_assign_revoke)
    kc.unsubscribe()

    msg = kc.poll(timeout=0.001)
    if msg is None:
        print('OK: poll() timeout')
    elif msg.error():
        print('OK: consumer error: %s' % msg.error().str())
    else:
        print('OK: consumed message')

    if msg is not None:
        assert msg.timestamp() == (TIMESTAMP_NOT_AVAILABLE, -1)

    msglist = kc.consume(num_messages=10, timeout=0.001)
    assert len(msglist) == 0, "expected 0 messages, not %d" % len(msglist)

    with pytest.raises(ValueError) as ex:
        kc.consume(-100)
    assert 'num_messages must be between 0 and 1000000 (1M)' == str(ex.value)

    with pytest.raises(ValueError) as ex:
        kc.consume(1000001)
    assert 'num_messages must be between 0 and 1000000 (1M)' == str(ex.value)

    partitions = list(map(lambda part: TopicPartition("test", part), range(0, 100, 3)))
    kc.assign(partitions)

    with pytest.raises(KafkaException) as ex:
        kc.seek(TopicPartition("test", 0, 123))
    assert 'Erroneous state' in str(ex.value)

    # Verify assignment
    assignment = kc.assignment()
    assert partitions == assignment

    # Pause partitions
    kc.pause(partitions)

    # Resume partitions
    kc.resume(partitions)

    # Get cached watermarks, should all be invalid.
    lo, hi = kc.get_watermark_offsets(partitions[0], cached=True)
    assert lo == -1001 and hi == -1001
    assert lo == OFFSET_INVALID and hi == OFFSET_INVALID

    # Query broker for watermarks, should raise an exception.
    try:
        lo, hi = kc.get_watermark_offsets(partitions[0], timeout=0.5, cached=False)
    except KafkaException as e:
        assert e.args[0].code() in (KafkaError._TIMED_OUT, KafkaError._WAIT_COORD, KafkaError.LEADER_NOT_AVAILABLE),\
            str(e.args([0]))

    kc.unassign()

    kc.commit(asynchronous=True)

    try:
        kc.commit(asynchronous=False)
    except KafkaException as e:
        assert e.args[0].code() in (KafkaError._TIMED_OUT, KafkaError._NO_OFFSET)

    # Get current position, should all be invalid.
    kc.position(partitions)
    assert len([p for p in partitions if p.offset == OFFSET_INVALID]) == len(partitions)

    try:
        kc.committed(partitions, timeout=0.001)
    except KafkaException as e:
        assert e.args[0].code() == KafkaError._TIMED_OUT

    try:
        kc.list_topics(timeout=0.2)
    except KafkaException as e:
        assert e.args[0].code() in (KafkaError._TIMED_OUT, KafkaError._TRANSPORT)

    try:
        kc.list_topics(topic="hi", timeout=0.1)
    except KafkaException as e:
        assert e.args[0].code() in (KafkaError._TIMED_OUT, KafkaError._TRANSPORT)

    kc.close()
Exemplo n.º 35
0
class SynchronizedConsumer(object):
    """
    This class implements the framework for a consumer that is intended to only
    consume messages that have already been consumed and committed by members
    of another consumer group.

    This works similarly to the Kafka built-in ``__consumer_offsets`` topic.
    The consumer group that is being "followed" (the one that must make
    progress for our consumer here to make progress, identified by the
    ``synchronize_commit_group`` constructor parameter/instance attribute) must
    report its offsets to a topic (identified by the ``commit_log_topic``
    constructor parameter/instance attribute). This consumer subscribes to both
    commit log topic, as well as the topic(s) that we are actually interested
    in consuming messages from. The messages received from the commit log topic
    control whether or not consumption from partitions belonging to the main
    topic is paused, resumed, or allowed to continue in its current state
    without changes.

    The furthest point in any partition that this consumer should ever consume
    to is the maximum offset that has been recorded to the commit log topic for
    that partition. If the offsets recorded to that topic move
    non-monotonically (due to an intentional offset rollback, for instance)
    this consumer *may* consume up to the highest watermark point. (The
    implementation here tries to pause consuming from the partition as soon as
    possible, but this makes no explicit guarantees about that behavior.)
    """
    initial_offset_reset_strategies = {
        'earliest': get_earliest_offset,
        'latest': get_latest_offset,
    }

    def __init__(self, bootstrap_servers, consumer_group, commit_log_topic,
                 synchronize_commit_group, initial_offset_reset='latest', on_commit=None):
        self.bootstrap_servers = bootstrap_servers
        self.consumer_group = consumer_group
        self.commit_log_topic = commit_log_topic
        self.synchronize_commit_group = synchronize_commit_group
        self.initial_offset_reset = self.initial_offset_reset_strategies[initial_offset_reset]

        self.__partition_state_manager = SynchronizedPartitionStateManager(
            self.__on_partition_state_change)
        self.__commit_log_consumer, self.__commit_log_consumer_stop_request = self.__start_commit_log_consumer()

        self.__positions = {}

        def commit_callback(error, partitions):
            if on_commit is not None:
                return on_commit(error, partitions)

        consumer_configuration = {
            'bootstrap.servers': self.bootstrap_servers,
            'group.id': self.consumer_group,
            'enable.auto.commit': 'false',
            'enable.auto.offset.store': 'true',
            'enable.partition.eof': 'false',
            'default.topic.config': {
                'auto.offset.reset': 'error',
            },
            'on_commit': commit_callback,
        }

        self.__consumer = Consumer(consumer_configuration)

    def __start_commit_log_consumer(self, timeout=None):
        """
        Starts running the commit log consumer.
        """
        stop_request_event = threading.Event()
        start_event = threading.Event()
        result = execute(
            functools.partial(
                run_commit_log_consumer,
                bootstrap_servers=self.bootstrap_servers,
                consumer_group='{}:sync:{}'.format(self.consumer_group, uuid.uuid1().hex),
                commit_log_topic=self.commit_log_topic,
                synchronize_commit_group=self.synchronize_commit_group,
                partition_state_manager=self.__partition_state_manager,
                start_event=start_event,
                stop_request_event=stop_request_event,
            ),
        )
        start_event.wait(timeout)
        return result, stop_request_event

    def __check_commit_log_consumer_running(self):
        if not self.__commit_log_consumer.running():
            try:
                result = self.__commit_log_consumer.result(timeout=0)  # noqa
            except TimeoutError:
                pass  # not helpful

            raise Exception('Commit log consumer unexpectedly exit!')

    def __on_partition_state_change(
            self, topic, partition, previous_state_and_offsets, current_state_and_offsets):
        """
        Callback that is invoked when a partition state changes.
        """
        logger.debug('State change for %r: %r to %r', (topic, partition),
                     previous_state_and_offsets, current_state_and_offsets)

        current_state, current_offsets = current_state_and_offsets
        if current_offsets.local is None:
            # It only makes sense to manipulate the consumer if we've got an
            # assignment. (This block should only be entered at startup if the
            # remote offsets are retrieved from the commit log before the local
            # consumer has received its assignment.)
            return

        # TODO: This will be called from the commit log consumer thread, so need
        # to verify that calling the ``consumer.{pause,resume}`` methods is
        # thread safe!
        if current_state in (SynchronizedPartitionState.UNKNOWN, SynchronizedPartitionState.SYNCHRONIZED,
                             SynchronizedPartitionState.REMOTE_BEHIND):
            self.__consumer.pause([TopicPartition(topic, partition, current_offsets.local)])
        elif current_state is SynchronizedPartitionState.LOCAL_BEHIND:
            self.__consumer.resume([TopicPartition(topic, partition, current_offsets.local)])
        else:
            raise NotImplementedError('Unexpected partition state: %s' % (current_state,))

    def subscribe(self, topics, on_assign=None, on_revoke=None):
        """
        Subscribe to a topic.
        """
        self.__check_commit_log_consumer_running()

        def assignment_callback(consumer, assignment):
            # Since ``auto.offset.reset`` is set to ``error`` to force human
            # interaction on an offset reset, we have to explicitly specify the
            # starting offset if no offset has been committed for this topic during
            # the ``__consumer_offsets`` topic retention period.
            assignment = {
                (i.topic, i.partition): self.__positions.get((i.topic, i.partition)) for i in assignment
            }

            for i in self.__consumer.committed([TopicPartition(topic, partition) for (
                    topic, partition), offset in assignment.items() if offset is None]):
                k = (i.topic, i.partition)
                if i.offset > -1:
                    assignment[k] = i.offset
                else:
                    assignment[k] = self.initial_offset_reset(consumer, i.topic, i.partition)

            self.__consumer.assign([TopicPartition(topic, partition, offset)
                                    for (topic, partition), offset in assignment.items()])

            for (topic, partition), offset in assignment.items():
                # Setting the local offsets will either cause the partition to be
                # paused (if the remote offset is unknown or the local offset is
                # not trailing the remote offset) or resumed.
                self.__partition_state_manager.set_local_offset(topic, partition, offset)
                self.__positions[(topic, partition)] = offset

            if on_assign is not None:
                on_assign(self, [TopicPartition(topic, partition)
                                 for topic, partition in assignment.keys()])

        def revocation_callback(consumer, assignment):
            for item in assignment:
                # TODO: This should probably also be removed from the state manager.
                self.__positions.pop((item.topic, item.partition))

            if on_revoke is not None:
                on_revoke(self, assignment)

        self.__consumer.subscribe(
            topics,
            on_assign=assignment_callback,
            on_revoke=revocation_callback)

    def poll(self, timeout):
        self.__check_commit_log_consumer_running()

        message = self.__consumer.poll(timeout)
        if message is None:
            return

        if message.error() is not None:
            return message

        self.__partition_state_manager.validate_local_message(
            message.topic(), message.partition(), message.offset())
        self.__partition_state_manager.set_local_offset(
            message.topic(), message.partition(), message.offset() + 1)
        self.__positions[(message.topic(), message.partition())] = message.offset() + 1

        return message

    def commit(self, *args, **kwargs):
        self.__check_commit_log_consumer_running()

        return self.__consumer.commit(*args, **kwargs)

    def close(self):
        self.__check_commit_log_consumer_running()

        self.__commit_log_consumer_stop_request.set()
        try:
            self.__consumer.close()
        finally:
            self.__commit_log_consumer.result()