Exemplo n.º 1
0
    def consumer_factory_(topic: str) -> Consumer:
        consumer = Consumer({
            "group.id": "asdf",
            "enable.auto.commit": False,
            "enable.partition.eof": False,
            **unittest_config.create_confluent_config(),
        })
        partitions = consumer.list_topics(topic=topic).topics[topic].partitions

        consumer.assign([
            TopicPartition(topic=topic, partition=p, offset=0)
            for p in partitions
        ])
        consumers.append(consumer)
        return consumer
Exemplo n.º 2
0
def kafka_consume_expected(topic,
                           group='0',
                           timeout=1.0,
                           mfilter=lambda x: True,
                           validator=lambda x: None,
                           after_subscribe=lambda: None):
    consumer = Consumer({
        'bootstrap.servers': KAFK,
        'group.id': group,
        'auto.offset.reset': 'earliest'  # earliest _committed_ offset
    })
    msgs = []
    topics = consumer.list_topics(topic)  # promises to create topic
    logging.debug("Topic state: %s", topics.topics)
    if topics.topics[topic].error is not None:
        logging.warning("Error subscribing to topic: %s", topics.topics)
        return msgs
    consumer.subscribe([topic])
    time.sleep(5)  # for kafka to rebalance consumer groups

    after_subscribe()

    logging.debug("Waiting for messages...")
    while True:
        msg = consumer.poll(timeout)

        if msg is None:
            break

        logging.info("Seen message: %r %r", msg.key(), msg.value())

        if msg.error():
            logging.warning("Consumer error: {}".format(msg.error()))
            continue

        if mfilter(msg):
            validator(msg)
            msgs.append(msg)

    consumer.commit()
    consumer.close()

    return msgs
Exemplo n.º 3
0
    class Kafka(object):
        def __init__(self, target_key) -> None:
            super().__init__()
            self.address = _address_for_key(target_key)
            kafka_config = {
                'bootstrap.servers': self.address,
                'group.id': "up9-test-group",
                'enable.auto.commit':
                'false'  # important for passive observing
            }
            if "ssl://" in self.address.lower():
                kafka_config['security.protocol'] = 'SSL'

            self.consumer = Consumer(kafka_config)
            self.producer = Producer(kafka_config)
            self.watching_topics = []

            self.consumer.list_topics(timeout=5)  # to check for connectivity

        def watch_topics(self, topics: list):
            def my_on_assign(consumer, partitions):
                logging.debug("On assign: %r", partitions)
                consumer.assign(partitions)
                for partition in partitions:
                    low, high = consumer.get_watermark_offsets(partition)
                    partition.offset = high
                    logging.debug("Setting offset: %r", partition)
                    consumer.seek(partition)

            self.watching_topics.extend(topics)
            self.consumer.subscribe(topics, on_assign=my_on_assign)
            self.consumer.poll(0.01)  # to trigger partition assignments

        def get_watched_messages(self, interval=0.0, predicate=lambda x: True):
            logging.debug(
                "Checking messages that appeared on kafka topics: %r",
                self.watching_topics)
            res = []

            start = time.time()
            while True:
                msg = self.consumer.poll(interval)
                if msg is None or time.time() - start > interval:
                    break  # done reading

                if msg.error():
                    raise KafkaException("kafka consumer error: {}".format(
                        msg.error()))

                logging.debug(
                    "Potential message: %r",
                    (msg.partition(), msg.key(), msg.headers(), msg.value()))
                if predicate(msg):
                    res.append(msg)

            # TODO: consumer.close()
            return res

        def assert_seen_message(self, resp, delay=0, predicate=lambda x: True):
            @recorder.assertion_decorator
            def assert_seen_kafka_message(resp, topics, delay):
                messages = self.get_watched_messages(delay, predicate)
                messages = [(m.topic(), m.key(), m.value(), m.headers())
                            for m in messages]
                if not messages:
                    raise AssertionError("No messages on Kafka topic %r" %
                                         topics)
                else:
                    logging.info("Validated the messages have appeared: %s",
                                 messages)

                return messages

            return assert_seen_kafka_message(resp, self.watching_topics, delay)

        def put(self, topic, data=None, json=None, headers=None):
            # TODO: parse key out of URL
            if topic.startswith('/'):
                topic = topic[1:]

            if data is None and json is not None:
                data = json_lib.dumps(json)

            with apiritif.transaction('kafka://[' + self.address + ']/' +
                                      topic):
                logging.info("Sending message to Kafka topic %r: %r", topic,
                             data)
                self.producer.produce(
                    topic, data, headers=[] if headers is None else headers)
                self.producer.poll(0)
                self.producer.flush()

                wrapped_req = self._make_request(
                    'PUT',
                    'kafka://' + self.address.split(',')[0] + '/' + topic,
                    data)
                wrapped_response = self._make_response(wrapped_req)
                recorder.record_http_request('PUT', self.address, wrapped_req,
                                             wrapped_response,
                                             _context.session)

            return wrapped_response

        def _make_request(self, method, url, request):
            req = requests.Request(method, url=url, data=request)
            prepared = req.prepare()
            _context.grpc_mapping[id(request)] = prepared
            return prepared

        def _make_response(self, wrapped_req):
            resp = requests.Response()
            resp.status_code = 202
            resp.request = wrapped_req
            resp._request = wrapped_req
            resp.msg = 'Accepted'
            resp.raw = io.BytesIO()
            return resp
Exemplo n.º 4
0
class TimeOrderedGeneratorWithTimeout(GeneratorInterface):
    """
    A general generator which can read multiple topics and merge their messages in time order.
    A message must be emitted at (arrival_system_time + latency_ms).
    In batch mode (until reaching the first EOP on each stream) the generator will not discard any messages.
    """

    def __init__(
            self
            , broker
            , groupid
            , topics_infos: List[TopicInfo]
            , latency_ms
            , commit_interval_sec=None
            , group_by_time=False
            , begin_timestamp=None
            , begin_flag=None
            , end_timestamp=None
            , end_flag=None
            , heartbeat_interval_ms=-1
            , begin_offset=None
    ):
        """
        :param broker: Broker to connect to.
        :param groupid: Group id of the consumer.
        :param topics_infos: [TopicInfo()] - list of TopicInfo objects.
        :param latency_ms: (integer >=0) Latency to wait before serving a message.
                            After this messages with lower or equal timestamps will be discarded.
        :param commit_interval_sec: How many seconds to wait between commits.-1 does not commit with the given group id.
        :param group_by_time: Group messages with the same timestamp. This will yield a list of messages.
        :param begin_timestamp: Timestamp of the kafka messages where the generator will start.
        :param begin_flag: BEGINNING, CONTINUE, LIVE - CONTINUE will continue from the last committed offset.
                            If there was no committed offset will start from the end of the stream.
        :param end_timestamp: Timestamp where to end the reading.
        :param end_flag: NEVER, END_OF_PARTITION
        :param heartbeat_interval_ms: -1 does not produce heartbeat. After every interval will produce a HeartBeat typed
                                        message with the timestamp.
        :param begin_offset: Starting offset position if begin_flag is set to OFFSET
        """
        if begin_timestamp is not None and begin_flag is not None:
            raise Exception('You can not set the begin timestamp and a flag in the same time.')
        if end_timestamp is not None and end_flag is not None:
            raise Exception('You can not set the end timestamp and a flag in the same time.')
        if begin_timestamp is not None and end_timestamp is not None and begin_timestamp >= end_timestamp:
            raise Exception('The begin timestamp is larger then the end timestamp.')
        if begin_flag is not None and end_flag is not None and \
                begin_flag == BeginFlag.LIVE and end_flag == EndFlag.END_OF_PARTITION:
            raise Exception('You can not start in live and process until the end of the streams.')
        if end_flag is not None and not (end_flag == EndFlag.END_OF_PARTITION or end_flag == EndFlag.NEVER):
            raise Exception('Unknown end flag: {} . Please use the given enum to use proper end flag.'.format(end_flag))
        if begin_flag == BeginFlag.OFFSET and begin_offset is None:
            raise Exception('Starting offset position must be configured if BeginFlag is set to OFFSET')
        if begin_offset is not None:
            if begin_flag != BeginFlag.OFFSET:
                raise Exception('Specific offset starting position is set but BeginFlag is not set to OFFSET.')
            elif not isinstance(begin_offset, int):
                raise Exception('Starting offset must be integer, not {}.'.format(type(begin_offset)))
        self.end_ts = end_timestamp
        self.end_flag = end_flag
        self.begin_offset = begin_offset
        self.commit_interval_sec = commit_interval_sec
        self.latency_ms = latency_ms
        self.group_by_time = group_by_time
        self.max_poll_interval_ms = 5 * 60 * 1000
        self.consumer = Consumer(
            {'bootstrap.servers': broker,
             'group.id': groupid,
             'enable.auto.commit': False,
             'auto.offset.reset': 'earliest' if begin_flag == BeginFlag.CONTINUE_OR_BEGINNING else 'latest',
             'fetch.wait.max.ms': 20,
             'max.poll.interval.ms': self.max_poll_interval_ms,
             'enable.partition.eof': True})
        self.last_poll = None
        self.running = True

        # Warning:
        # If you check individual topics, kafka may auto create them if the auto.create.topics.enable is set to True.
        try:
            self.consumer.list_topics(timeout=1)
        except KafkaException as e:
            if e.args[0].name() == "_TRANSPORT":
                logging.error(
                    'Broker "{0}" is not available. Please check if it is running and accessible. \n{1}'.format(broker, e)
                )
                self.running = False
            else:
                raise e

        self.tps = []
        self.queues = {}
        self.messages_to_be_committed = {}
        self.begin_timestamp = begin_timestamp
        for ti in topics_infos:
            topic_name = ti.topic
            self.messages_to_be_committed[topic_name] = {'last_msg': None, 'committed': True}
            if begin_timestamp is not None:
                self.tps.extend(self.consumer.offsets_for_times(
                    [TopicPartition(topic_name, partition=ti.partition, offset=begin_timestamp)]))
            elif begin_flag is not None:
                if begin_flag == BeginFlag.BEGINNING:
                    self.tps.append(TopicPartition(topic_name, partition=ti.partition, offset=OFFSET_BEGINNING))
                elif begin_flag in (BeginFlag.CONTINUE, BeginFlag.CONTINUE_OR_BEGINNING):
                    self.tps.append(TopicPartition(topic_name, partition=ti.partition, offset=OFFSET_STORED))
                elif begin_flag == BeginFlag.LIVE:
                    self.tps.append(TopicPartition(topic_name, partition=ti.partition, offset=OFFSET_END))
                elif begin_flag == BeginFlag.OFFSET:
                    self.tps.append(TopicPartition(
                        topic_name, 
                        partition=ti.partition,
                        offset=OFFSET_BEGINNING if begin_offset <= 0 else begin_offset)
                    )
                else:
                    raise Exception('Unknown begin flag. Please use the enum to provide proper begin flag.')
            else:
                self.tps.append(TopicPartition(topic_name, partition=ti.partition, offset=OFFSET_END))
            end_offset = None
            if end_flag is not None and end_flag == EndFlag.END_OF_PARTITION:
                end_offset = self.consumer.get_watermark_offsets(TopicPartition(topic_name, 0))[1] - 1
            if end_offset is None or end_offset >= 0:
                self.queues[topic_name] = Topic(
                    topic_name
                    , self.consumer
                    , end_offset=end_offset
                    , partition=ti.partition
                    , drop=ti.drop
                    , is_live=(begin_timestamp is None and begin_flag is None) or begin_flag == BeginFlag.LIVE
                )
        self.consumer.assign(self.tps)
        self.last_commit = time.time()
        self.heartbeat_interval_ms = heartbeat_interval_ms
        self.next_hb = None

    def stopGenerator(self):
        self.running = False

    def _serve_messages(self, message_to_serve):
        if self.commit_interval_sec is not None and self.group_by_time:
            for msg in message_to_serve:
                self.messages_to_be_committed[msg.topic()]['last_msg'] = msg
                self.messages_to_be_committed[msg.topic()]['committed'] = False

        # serve messages
        if self.group_by_time:
            yield message_to_serve
        else:
            for msg in message_to_serve:
                self.messages_to_be_committed[msg.topic()]['last_msg'] = msg
                self.messages_to_be_committed[msg.topic()]['committed'] = False
                yield msg
                if not self.running:
                    break

        # commit messages when they were delivered
        current_time = time.time()
        if self.commit_interval_sec is not None and (
                current_time - self.last_commit) > self.commit_interval_sec:
            for k in self.messages_to_be_committed.keys():
                if not self.messages_to_be_committed[k]['committed']:
                    self.consumer.commit(self.messages_to_be_committed[k]['last_msg'])
                    self.messages_to_be_committed[k]['committed'] = True
            self.last_commit = current_time

    def _serve_heartbeat(self, current_timestamp_ms):
        if self.next_hb is None:
            if self.begin_timestamp is not None:
                self.next_hb = self.begin_timestamp
            else:
                self.next_hb = current_timestamp_ms
        while self.next_hb <= current_timestamp_ms:
            yield HeartBeat(self.next_hb)
            self.next_hb += self.heartbeat_interval_ms

    def _can_serve(self):
        min_ets = min([q.queue[0].message.timestamp()[1] for q in self.queues.values() if len(q.queue) > 0], default=-1)
        if min_ets == -1:
            return None
        deadline = getSystemTimestamp() - self.latency_ms
        if all([q.can_be_emitted(min_ets) for q in self.queues.values()]) and \
                any([q.queue[0].ts < deadline for q in self.queues.values()
                     if len(q.queue) > 0 and q.queue[0].message.timestamp()[1] == min_ets]):
            return min_ets
        else:
            return None

    def getMessages(self):
        while self.running:
            if all([v.stopped for v in self.queues.values()]):
                message_to_serve = []
                for q in self.queues.values():
                    message_to_serve.extend(q.queue)
                message_to_serve = [m.message for m in message_to_serve]
                message_to_serve.sort(key=lambda x: x.timestamp()[1])
                while len(message_to_serve) > 0:
                    ts = message_to_serve[0].timestamp()[1]
                    serve_it = []
                    while len(message_to_serve) > 0 and message_to_serve[0].timestamp()[1] == ts:
                        serve_it.append(message_to_serve.pop(0))
                    if not self.heartbeat_interval_ms == -1:
                        yield from self._serve_heartbeat(ts)
                    yield from self._serve_messages(serve_it)
                logging.info('Exiting from generator.')
                break
            self.last_poll = getSystemTimestamp()
            msg = self.consumer.poll(0.001)
            if msg is not None:
                if msg.error():
                    if msg.error().code() == KafkaError._PARTITION_EOF:
                        if msg.topic() in self.queues:
                            self.queues[msg.topic()].first_eop_reached = True
                            self.queues[msg.topic()].end_of_partition = True
                    else:
                        logging.error('Unhandle error: {}'.format(msg.error()))
                        break
                else:
                    self.queues[msg.topic()].end_of_partition = False
                    if self.end_ts is not None and msg.timestamp()[1] > self.end_ts:
                        self.queues[msg.topic()].stop_topic()
                    else:
                        self.queues[msg.topic()].add_message(msg)
            while self.running:
                event_ts_to_serve = self._can_serve()
                if event_ts_to_serve is None or \
                        self.max_poll_interval_ms - (getSystemTimestamp() - self.last_poll) < 30000:
                    if self.end_flag == EndFlag.NEVER and self.heartbeat_interval_ms != -1 \
                            and any([q.end_of_partition for q in self.queues.values()]):
                        if self.next_hb is None:
                            self.next_hb = min(getSystemTimestamp() - self.latency_ms,
                                               min([q.queue[0].message.timestamp()[1] for q in self.queues.values()
                                                    if len(q.queue) > 0], default=sys.maxsize))
                        if self.next_hb < min(getSystemTimestamp() - self.latency_ms,
                                              min([q.queue[0].message.timestamp()[1] for q in self.queues.values()
                                                   if len(q.queue) > 0], default=sys.maxsize)):
                            yield from self._serve_heartbeat(self.next_hb)
                    break
                if self.heartbeat_interval_ms != -1:
                    yield from self._serve_heartbeat(event_ts_to_serve)
                message_to_serve = []
                for q in self.queues.values():
                    message_to_serve.extend(q.get_messages(event_ts_to_serve))
                yield from self._serve_messages(message_to_serve)
                if self.end_ts is not None and self.end_ts <= event_ts_to_serve:
                    self.running = False
        self.consumer.close()
Exemplo n.º 5
0
]

if __name__ == "__main__":
    parser = argparse.ArgumentParser(
        description=
        "create topics if they dont exist using the output of CS:INSTLIST")
    parser.add_argument("filename")
    parser.add_argument("--broker", help="the broker to create the topics on")
    args = parser.parse_args()

    broker = args.broker
    conf = {"bootstrap.servers": broker}
    admin_client = AdminClient(conf)

    conf["group.id"] = str(uuid.uuid4())
    cons = Consumer(conf)
    topics = cons.list_topics()
    topics_list = topics.topics

    with open(args.filename) as file:
        json = json.load(file)
    for item in json:
        inst_name = item["name"]
        for topic_suffix in TOPICS_PER_INST:
            topic_to_check = inst_name + topic_suffix
            if topic_to_check not in topics_list:
                print(f"creating {topic_to_check}")
                new_topic = NewTopic(topic_to_check, num_partitions=1)
                admin_client.create_topics([new_topic])
    admin_client.poll(10)