Exemplo n.º 1
0
def run_consumer():
    logger = logging.getLogger('consumer')
    logger.setLevel(logging.DEBUG)
    handler = logging.StreamHandler()
    handler.setFormatter(
        logging.Formatter('%(asctime)-15s %(levelname)-8s %(message)s'))
    logger.addHandler(handler)

    consumer = Consumer(conf)
    consumer.subscribe(topics=config.resolve_config("CONSUMER_TOPICS"))

    try:
        while True:
            msg = consumer.poll(timeout=1.0)
            if msg is None:
                continue
            if msg.error():
                raise KafkaException(msg.error())
            else:
                # Proper message
                # sys.stderr.write('%% %s [%d] at offset %d with key %s:\n' %
                #                 (msg.topic(), msg.partition(), msg.offset(),
                #                  str(msg.key())))
                print(msg.value())
    except KeyboardInterrupt:
        sys.stderr.write('%% Aborted by user\n')

    finally:
        # Close down consumer to commit final offsets.
        consumer.close()
Exemplo n.º 2
0
 def _init_consumer(topics: List[str], config: Dict) -> Consumer:
     """config must contain:
         `bootstrap.servers`
         'group.id'
     but may contain every other kafka setting as well
     """
     assert "bootstrap.servers" in config.keys()
     assert "group.id" in config.keys()
     consumer = Consumer(config)
     consumer.subscribe(topics)
     return consumer
Exemplo n.º 3
0
def create_consumer(kafka_broker, topic):
    ic = Consumer({
        'bootstrap.servers': kafka_broker,
        'group.id': str(uuid.uuid4()),
        'auto.offset.reset': 'earliest',
        'api.version.request': True,
        'max.poll.interval.ms': 60000
    })

    ic.subscribe([topic])
    return ic
Exemplo n.º 4
0
class KafmanConsumer(metaclass=Singleton):
    """TODO"""
    def __init__(self):
        super().__init__()
        self.topic = None
        self.consumer = None
        self.started = False
        self.bus = EventBus.get(CONSUMER_BUS)

    def start(self, settings: dict) -> None:
        """TODO"""
        if self.consumer is None:
            self.consumer = Consumer(settings)
            self.started = True

    def stop(self) -> None:
        """TODO"""
        if self.consumer is not None:
            del self.consumer
            self.consumer = None
            self.started = False

    def consume(self, topics: List[str]) -> None:
        """TODO"""
        if self.started:
            tr = threading.Thread(target=self._consume, args=(topics, ))
            tr.setDaemon(True)
            tr.start()

    def _consume(self, topics: List[str]) -> None:
        """TODO"""
        self.consumer.subscribe(topics)
        try:
            while self.started:
                message = self.consumer.poll(POLLING_INTERVAL)
                if message is None:
                    continue
                elif not message.error():
                    msg = message.value().decode(Charset.UTF_8.value)
                    self.bus.emit(MSG_CONS_EVT,
                                  message=msg,
                                  topic=message.topic())
                elif message.error().code() == PARTITION_EOF:
                    print(
                        f"End of partition reached {message.topic()}/{message.partition()}"
                    )
                else:
                    print(f"Error occurred: {message.error().str()}")
        except KeyboardInterrupt:
            print("Keyboard interrupted")
        finally:
            if self.consumer:
                self.consumer.close()
class KafkaConsumer(BaseKafkaConsumer):
    def __init__(self, config, logger):
        self._config = config["consumer"]
        conf = self._config["conf"]
        conf.setdefault("group.id", str(uuid.uuid1()))
        self.autocommit_enabled = conf.get("enable.auto.commit", True)
        self._logger = logger
        internal_log_path = self._config.get("internal_log_path")
        if internal_log_path:
            debug_logger = logging.getLogger("debug_consumer")
            timestamp = time.strftime("_%d%m%Y_")
            debug_logger.addHandler(
                logging.FileHandler("{}/kafka_consumer_debug{}{}.log".format(
                    internal_log_path, timestamp, os.getpid())))
            conf["logger"] = debug_logger
        self._consumer = Consumer(**conf)

    def subscribe(self, topics=None):
        topics = topics or list(self._config["topics"].values())
        self._consumer.subscribe(topics)

    def poll(self):
        msg = self._consumer.poll(self._config["poll_timeout"])
        if msg is not None:
            err = msg.error()
            if err:
                if err.code() == KafkaError._PARTITION_EOF:
                    return None
                else:
                    self._logger.info(
                        "KafkaConsumer Error {} at pid {}:  topic={} partition=[{}]  reached end at offset {}\n"
                        .format(err.code(), os.getpid(), msg.topic(),
                                msg.partition(), msg.offset()))
                    raise KafkaException(err)

            if msg.value():
                return msg

    def commit_offset(self, msg):
        if msg is not None:
            if self.autocommit_enabled:
                self._consumer.store_offsets(msg)
            else:
                self._consumer.commit(msg, async=False)

    def close(self):
        self._consumer.close()
Exemplo n.º 6
0
def get_partitions_with_offsets(broker):
    input_consumer = Consumer({
        'bootstrap.servers': broker,
        'group.id': str(uuid.uuid4()),
        'auto.offset.reset': 'earliest',
        'enable.auto.commit': True,
        'auto.commit.interval.ms': 1000,
        'api.version.request': True,
        'max.poll.interval.ms': 60000
    })

    output_consumer = Consumer({
        'bootstrap.servers': broker,
        'group.id': str(uuid.uuid4()),
        'auto.offset.reset': 'earliest',
        'enable.auto.commit': True,
        'auto.commit.interval.ms': 1000,
        'api.version.request': True,
        'max.poll.interval.ms': 60000
    })

    input_consumer.subscribe(['read', 'update', 'transfer'])
    output_consumer.subscribe(['responses'])

    msgs = input_consumer.consume(timeout=5, num_messages=100)
    if len(msgs) == 0:
        print("returned empty")
        return {}

    partitions_with_offsets = {'input': [], 'output': []}

    input_partitions = input_consumer.assignment()
    for p in input_partitions:
        _, h = input_consumer.get_watermark_offsets(p)
        p.offset = h
        partitions_with_offsets['input'].append(p)

    output_consumer.consume(timeout=5, num_messages=100)
    output_partitions = output_consumer.assignment()
    for p in output_partitions:
        _, h = output_consumer.get_watermark_offsets(p)
        p.offset = h
        partitions_with_offsets['output'].append(p)

    return partitions_with_offsets
Exemplo n.º 7
0
def kafka_consume_expected(topic,
                           group='0',
                           timeout=1.0,
                           mfilter=lambda x: True,
                           validator=lambda x: None,
                           after_subscribe=lambda: None):
    consumer = Consumer({
        'bootstrap.servers': KAFK,
        'group.id': group,
        'auto.offset.reset': 'earliest'  # earliest _committed_ offset
    })
    msgs = []
    topics = consumer.list_topics(topic)  # promises to create topic
    logging.debug("Topic state: %s", topics.topics)
    if topics.topics[topic].error is not None:
        logging.warning("Error subscribing to topic: %s", topics.topics)
        return msgs
    consumer.subscribe([topic])
    time.sleep(5)  # for kafka to rebalance consumer groups

    after_subscribe()

    logging.debug("Waiting for messages...")
    while True:
        msg = consumer.poll(timeout)

        if msg is None:
            break

        logging.info("Seen message: %r %r", msg.key(), msg.value())

        if msg.error():
            logging.warning("Consumer error: {}".format(msg.error()))
            continue

        if mfilter(msg):
            validator(msg)
            msgs.append(msg)

    consumer.commit()
    consumer.close()

    return msgs
Exemplo n.º 8
0
def pay_order():
    consumer = Consumer({
        'bootstrap.servers': os.environ.get('BROKER'),
        'group.id': 'consumer-pay-id',
        'auto.offset.reset': 'earliest'
    })

    consumer.subscribe(['pay_order'])

    while True:
        msg = consumer.poll(1.0)
        if msg is None:
            continue
        if msg.error():
            logging.error("Consumer error: {}".format(msg.error()))
            continue
        data = json.loads(msg.value())
        OrderPayStory().execute(data.get('order_id'))

    consumer.close()
    def start(self):

        c = Consumer({
            'bootstrap.servers': KAFKA_BOOTSTRAP_SERVICE,
            'group.id': KAFKA_GROUP_ID,
            'auto.offset.reset': 'earliest'
        })

        c.subscribe([BTC_BLOCK_TOPIC])

        while True:
            msg = c.poll(1.0)

            if msg is None:
                continue
            if msg.error():
                print("Consumer error: {}".format(msg.error()))
                continue

            data = json.loads(msg.value().decode('utf-8'))
            cache.set("latest_block", data, timeout=None)
Exemplo n.º 10
0
def deamon():
    """ Termite Client """
    group = ""
    KAFKA_HOST = ""
    KAFKA_TOPIC=""
    print KAFKA_HOST
    c = Consumer({
        "bootstrap.servers": KAFKA_HOST,
        'group.id': group,
    })
    c.subscribe([KAFKA_TOPIC])

    running = True
    while running:
        msg = c.poll(1)
        if msg is None:
            continue
        if not msg.error():
            data = json.loads(msg.value())
            print("receive msg:", data)
            kwargs = {
                "work_id": data["Work_id"],
                "flow_id": data["Flow_id"],
                "cid": data["Cid"]
            }
            # 视频美女标签识别
            if data.get("Work", "") == "video_tag_detect":
                t_video_tag_detect(**kwargs)
            else:
                pass
        else:
            if msg.error().code() == KafkaError._PARTITION_EOF:
                print "Skip-Error Message-Topic: {} Partition: {} Offset: {}Error: {}".format(msg.topic(),
                                                                                              msg.partition(),
                                                                                              msg.offset(),
                                                                                              msg.error())
            else:
                print "Error Message: {}".format(msg.error())
            time.sleep(0.01)
    c.close()
Exemplo n.º 11
0
def order_channel():
    consumer = Consumer({
        'bootstrap.servers': os.environ.get('BROKER'),
        'group.id': 'consumer-order-id',
        'auto.offset.reset': 'earliest'
    })

    consumer.subscribe([
        'order_reserved', 'order_paid', 'order_pay_failed',
        'order_reserve_rejected'
    ])

    while True:
        msg = consumer.poll(1.0)
        if msg is None:
            continue
        if msg.error():
            logging.error("Consumer error: {}".format(msg.error()))
            continue
        msg.topic()
        data = json.loads(msg.value())
        topic = msg.topic()

        # TODO For demo
        if topic == 'order_reserved':
            OrderSaga().pay(data.get('order_id'))
            continue
        if topic == 'order_paid':
            OrderSaga().approve(data.get('order_id'))
            continue
        if topic == 'order_pay_failed':
            OrderSaga().reject_reserve(data.get('order_id'))
            continue
        if topic == 'order_reserve_rejected':
            OrderSaga().cancel(data.get('order_id'))
            continue

    consumer.close()
Exemplo n.º 12
0
def main(args):
    serial = args.serial
    num_messages = args.num_messages
    brokers = args.brokers
    group_id = args.group_id
    input_topic = args.input_topic
    input_partition = args.input_partition
    output_topic = args.output_topic

    if serial:
        print("Running in SERIAL mode")
        print(
            "The input producer will wait for the reply of the transactor before producing the next message."
        )
    else:
        print("Running in PARALLEL mode")
        print(
            "The input producer will produce all messages in parallel (at once) after the first message."
        )

    tr_args = [
        sys.executable,
        os.path.join(HERE, "eos-transactions.py"),
        "-b",
        brokers,
        "-g",
        group_id + "-tr",
        "-t",
        input_topic,
        "-p",
        str(input_partition),
        "-o",
        output_topic,
    ]

    output_consumer = Consumer({
        "bootstrap.servers": brokers,
        "group.id": group_id + "-pr",
        "auto.offset.reset": "earliest",
        "enable.auto.commit": True,
        "enable.partition.eof": False,
    })
    output_consumer.subscribe([output_topic])

    input_producer = Producer({
        'bootstrap.servers': brokers,
    })

    try:
        with tempfile.NamedTemporaryFile(mode='w+') as f:
            tr_proc = subprocess.Popen(tr_args,
                                       stderr=subprocess.STDOUT,
                                       stdout=f,
                                       cwd=HERE,
                                       close_fds=True)
            try:
                time.sleep(1)
                assert tr_proc.poll() is None
                tx = 0
                for i in range(num_messages):
                    input_producer.produce(input_topic,
                                           key=b"xy",
                                           value=str(tx).encode("ascii"))
                    tx += 1
                    assert input_producer.flush(10) == 0
                    while serial or tx <= 1:
                        msg = output_consumer.poll(1.0)
                        if msg is None:
                            continue
                        assert msg.error() is None
                        if tx == 1:
                            t_start = time.time()
                        break
                if not serial:
                    for _ in range(num_messages - 1):
                        msg = output_consumer.poll(1.0)
                        if msg is None:
                            continue
                        assert msg.error() is None

                print("Processing took {}".format(time.time() - t_start))
            finally:
                if tr_proc.poll() is None:
                    tr_proc.terminate()
                    tr_proc.wait()
            f.seek(0)
            eos_out = f.read()
    finally:
        output_consumer.close()  # commit offsets

    i = 0
    c = False
    send_offset_logs = defaultdict(list)
    send_offset_times = []
    for line in eos_out.split("\n"):
        if line.startswith(":DEMO:START "):
            c = True
        if c:
            send_offset_logs[i].append(line)
        if line.startswith(":DEMO:END "):
            send_offset_times.append(float(line.rpartition(" ")[-1]))
            c = False
            i += 1

    print("\nSend offset times:", send_offset_times)
    print("Send offset times average:",
          sum(send_offset_times) / len(send_offset_times))

    print("\nRelevant log snippet from the middle:")
    print("\n".join(send_offset_logs[int(i / 2)]))

    print("\nFull output of the transactor:")
    print(eos_out)
Exemplo n.º 13
0
"""
Module comment
"""

LOGGER = logging.getLogger(__name__)

if __name__ == '__main__':

    c = Consumer({
        'bootstrap.servers':
        'qg-cdh-server-04.vcom.local:9092,qg-cdh-server-05.vcom.local:9092,qg-cdh-server-06.vcom.local:9092',
        'group.id': 'ddc_test_group',
        'auto.offset.reset': 'earliest'
    })

    c.subscribe(['ddc_test_topic1'])
    print('consumer start')
    count = 0
    while True:
        msg = c.poll(1.0)

        if msg is None:
            continue
        if msg.error():
            print("Consumer error: {}".format(msg.error()))
            continue
        count += 1
        print('Received message: {}'.format(count))
        # print('Received message: {}'.format(msg.value().decode('utf-8')))

        # c.close()
class BreadCrumbDataConsumer:
    _logger = logging.getLogger('BreadCrumbDataConsumer')

    def __init__(self):
        kafka_configs = KafkaHelper.get_kafka_configs()
        kafka_configs['group.id'] = 'python_breadcrumb_data_consumer'
        kafka_configs['auto.offset.reset'] = 'earliest'
        self._consumer = Consumer(kafka_configs)

        self._bread_crumb_repo = BreadCrumbRepository()
        self._trips_stop_data = dict()

    def consume_breadcrumb_records(self):

        self._logger.info("Starting breadcrumb data consumer ...")
        self._consumer.subscribe([STOP_EVENT_TOPIC, BREADCRUMB_DATA_TOPIC])

        stop_events_records_count = 0
        consumed_breadcrumb_records_count = 0
        bread_crumb_records_saved_to_db_count = 0
        breadcrumbs = list()
        last_saved_to_db = datetime.now()
        try:
            while True:

                duration_from_last_saved_to_db = datetime.now(
                ) - last_saved_to_db
                if len(breadcrumbs) >= 50_000 or (
                        len(breadcrumbs) > 0
                        and duration_from_last_saved_to_db.total_seconds() >
                    (60 * 2)):
                    self._bread_crumb_repo.bulk_save_breadcrumbs(
                        breadcrumbs, self._trips_stop_data)
                    bread_crumb_records_saved_to_db_count += len(breadcrumbs)
                    breadcrumbs.clear()
                    last_saved_to_db = datetime.now()

                    self._logger.info(
                        'Number of breadcrumb records consumed = {}, stop event records consumed = {}, records saved to db = {}'
                        .format(consumed_breadcrumb_records_count,
                                stop_events_records_count,
                                bread_crumb_records_saved_to_db_count))

                msg = self._consumer.poll(1.0)
                if msg is None:
                    continue
                elif msg.error():
                    self._logger.error('error: {}'.format(msg.error()))
                else:
                    msg_topic = msg.topic()
                    message_data = msg.value().decode("utf-8")

                    if msg_topic == BREADCRUMB_DATA_TOPIC:
                        consumed_breadcrumb_records_count += 1
                        self.process_bread_crumb_record(
                            breadcrumbs, message_data)
                    elif msg_topic == STOP_EVENT_TOPIC:
                        stop_events_records_count += 1
                        self.process_stop_event_records(message_data)

                    self._logger.debug(
                        'Number of breadcrumb records consumed = {}, stop event records consumed = {}'
                        .format(consumed_breadcrumb_records_count,
                                stop_events_records_count))
        finally:
            self._consumer.close()
            self._bread_crumb_repo.bulk_save_breadcrumbs(
                breadcrumbs, self._trips_stop_data)

    def process_bread_crumb_record(self, breadcrumbs, message_data):
        try:
            breadcrumb = BreadCrumb.parse_raw(message_data)
            breadcrumbs.append(breadcrumb)
        except Exception as ex:
            self._logger.debug('Encountered an error parsing a bread crumb.',
                               ex)

    def process_stop_event_records(self, message_data):
        try:
            trip_stop_dict = json.loads(message_data)
            trip_id = list(trip_stop_dict.keys())[0]

            if trip_id not in self._trips_stop_data.keys():
                trip_stop_events_df = pd.read_json(
                    list(trip_stop_dict.values())[0])
                first_row = trip_stop_events_df.iloc[0]

                self._trips_stop_data[trip_id] = {
                    'route_id': first_row['route_number'],
                    'service_key': first_row['service_key'],
                    'direction': first_row['direction']
                }

        except Exception as ex:
            self._logger.debug(
                'Encountered an error parsing a stop events record.', ex)
Exemplo n.º 15
0
def exec_benchmark(duration_s, fps, kafka_loc, output_topic, silent):
    """Measures throughput at the output Kafka topic,
    by checking the growth in all partitions"""

    c = Consumer({
        'bootstrap.servers': kafka_loc,
        'group.id': 'benchmark-' + str(uuid.uuid4()),
        'auto.offset.reset': 'latest',
        'max.poll.interval.ms': 86400000,
        'isolation.level': 'read_committed'
    })

    # === Get topic partitions

    topic_partitions = None

    def store_topic_partition(consumer, partitions):
        nonlocal topic_partitions
        topic_partitions = partitions

    c.subscribe([output_topic], on_assign=store_topic_partition)
    while topic_partitions is None:
        c.consume(timeout=0.5)

    #Loop read partitions

    throughput_measured = []
    throughput_measured_per_partition = {}
    last_values = {}
    for p in topic_partitions:
        low, high = c.get_watermark_offsets(p)
        throughput_measured_per_partition[p.partition] = []
        last_values[p.partition] = high
        #if silent != "silent":
        #    print("Starting value for partition {}: {}".format(p.partition, high))

    MS_PER_UPDATE = 1000 / fps

    start_time = current_milli_time()
    last_time = start_time
    current_time = start_time
    last_write_time = current_time

    lag = 0.0

    while current_time < start_time + duration_s * 1000:
        current_time = current_milli_time()
        elapsed = current_time - last_time
        last_time = current_time
        lag += elapsed
        while lag >= MS_PER_UPDATE:
            #calc new val
            total_new = 0
            curr_time_for_print = current_milli_time()
            time_delta = ((curr_time_for_print - last_write_time) / 1000)
            if time_delta > 0:
                for p in topic_partitions:
                    low, high = c.get_watermark_offsets(p)
                    delta = high - last_values[p.partition]
                    total_new += delta
                    throughput_measured_per_partition[p.partition].append(
                        (delta / time_delta, curr_time_for_print))
                    last_values[p.partition] = high
                throughput_measured.append(
                    (total_new / time_delta, curr_time_for_print))
                last_write_time = curr_time_for_print

            lag -= MS_PER_UPDATE

    if silent != "silent":
        #Print column names
        #TIME THROUGHPUT PART-0 ... PART-N
        columns = "TIME\tTHROUGHPUT"
        for i in range(len(topic_partitions)):
            columns += "\tPART-{}".format(str(i))
        print(columns)
        for row in range(len(throughput_measured)):
            row_data = "{}\t{}".format(throughput_measured[row][1],
                                       int(throughput_measured[row][0]))
            for i in range(len(topic_partitions)):
                row_data += "\t{}".format(
                    int(throughput_measured_per_partition[i][row][0]))
            print(row_data)
    else:
        print(
            int(
                statistics.mean(
                    [x[0] for x in throughput_measured if x[0] > 0.0])))
Exemplo n.º 16
0
class AioConsumer:

    def __init__(self, config,
                 topics: list,
                 group_id: str,
                 handler,
                 max_retry=-1,
                 consumer_no=0,
                 timeout=1,
                 loop=None, exe=None):
        """
        consumer = new AioConsumer(...)
        :param config: kafka consumer config
        :param topics:
        :param group_id:
        :param handler:
        :param max_retry: 消费失败重试次数。-1:不重试
        :param consumer_no: 消费者编号
        :param timeout: poll超时时间
        :param loop:
        :param exe:
        """
        self.loop = loop or asyncio.get_event_loop()
        assert config is not None, 'init kafka consumer error, config is None'
        _config = copy.deepcopy(config)
        _config['group.id'] = group_id
        _config['on_commit'] = self.commit_completed
        self.handler = handler
        self.consumer = Consumer(_config)
        self.consumer.subscribe(topics)
        self.redis_retry_key = f'{"_".join(topics)}_{self.handler.__name__}'
        self.name = f'{self.redis_retry_key}_{consumer_no}'
        self.max_retry = max_retry
        self.exe = exe
        self.timeout = timeout
        # 'INIT' -> 'RUNNING' -> 'STOP'
        self.status = 'INIT'

    @staticmethod
    def commit_completed(err, partitions):
        if err:
            logger.info(str(err))
        else:
            logger.info("Committed partition offsets: " + str(partitions))

    async def poll(self):
        return await self.loop.run_in_executor(self.exe, self.consumer.poll, self.timeout)

    async def _get_message_from_kafka(self):
        poll_message = await self.poll()
        if not poll_message:
            return None
        elif poll_message.error():
            raise KafkaException(poll_message.error())
        else:
            return poll_message.value()

    async def run(self):
        while self.status == 'RUNNING':
            str_message = await self._get_message_from_kafka()
            message = json.loads(str_message or '{}')
            if not message:
                await asyncio.sleep(1)
                continue
            try:
                if asyncio.iscoroutinefunction(self.handler):
                    await self.handler(message)
                else:
                    self.handler(message)
                await self.commit()
            except Exception as e:
                logger.warning(f'{str(self)} handler error: {e.args}. msg: {str_message}')

        await self.close()

    async def commit(self):
        def _commit():
            self.consumer.commit(asynchronous=False)
        await self.loop.run_in_executor(self.exe, _commit)

    async def close(self):
        await self.commit()
        await self.loop.run_in_executor(self.exe, self.consumer.close)
        logger.info(f'{self.name} closed')

    def stop(self):
        self.status = 'STOP'
Exemplo n.º 17
0
    instrument[CHILDREN].append(source)


if __name__ == "__main__":
    parser = argparse.ArgumentParser(
        description="Amend data to runinfo messages")
    parser.add_argument("-b", "--broker")
    args = parser.parse_args()
    broker = args.broker
    conf = {"bootstrap.servers": broker, "group.id": str(uuid.uuid4())}
    admin_client = AdminClient(conf)
    cons = Consumer(conf)
    prod = Producer(conf)
    topics = [topic + "_runInfo" for topic in INST_NAMES]
    print(f"subscribing to {topics}")
    cons.subscribe(topics=topics)
    while True:
        try:
            # SIGINT can't be handled when polling, limit timeout to 1 second.
            msg = cons.poll(1.0)
            if msg is None:
                continue
            message_topic = msg.topic()
            instrument_name = message_topic.split("_runInfo")[0]
            des = deserialise_pl72(msg.value())

            structure = des.nexus_structure
            entry = _create_group("raw_data_1", "NXentry")
            detector_1 = _create_group("detector_1", "NXdetector")
            detector_1[CHILDREN].append(structure["entry"]["events"])
            instrument = _create_group("instrument", "NXinstrument")
Exemplo n.º 18
0
class MsgConsumer:
    def __init__(self,
                 topic,
                 broker_address,
                 group_id='group',
                 client_id='client',
                 auto_offset_reset='earliest',
                 num_messages=1,
                 verbose=False):
        """Consumer for handling EEG Streamer messages.

        Args:
            topic: Topic to subscribe to
            broker_address: Broker address
            group_id: group ID
            client_id: client ID
            auto_offset_reset: (default: 'earliest')
            num_messages: Maximum number of messages to consume each time (default: 1)
            verbose: verbose mode. (default: False)
        """
        self.data = deque()
        self.timestamps = deque()

        self.__num_msgs = num_messages
        """Maximum number of messages to consume each time (default: 1)"""

        self.__verbose = verbose

        self.__streamqueue = deque()

        self.__consumer = Consumer({
            'bootstrap.servers': broker_address,
            'auto.offset.reset': auto_offset_reset,
            'group.id': group_id,
            'client.id': client_id,
            'enable.auto.commit': True,
            'session.timeout.ms': 6000,
            'max.poll.interval.ms': 10000
        })
        """consumer that reads stream of EEG signal"""
        self.__consumer.subscribe([topic])

    def listen(self):
        """read stream from Kafka and append to streamqueue

        Returns:
            list of list: dataset (nchannel x nsample) or None
        """
        # If chunk size is too large, consume it multiple epochs
        chunk_size = self.__num_msgs
        msgs = []
        while chunk_size > 100:
            msgs.extend(self.__consumer.consume(num_messages=100, timeout=1))
            chunk_size -= 100
        msgs.extend(self.__consumer.consume(num_messages=chunk_size,
                                            timeout=1))

        print(f"INFO: Received {str(len(msgs))} messages"
              ) if self.__verbose else None

        if msgs is None or len(msgs) <= 0:
            return None

        self.__streamqueue.extendleft(msgs)  # Enqueue

        if len(self.__streamqueue) < self.__num_msgs:
            return None

        # Dequeue
        msgs__ = [self.__streamqueue.pop() for i in range(0, self.__num_msgs)]

        timestamps, data = [], []
        for msg in msgs__:
            time, values = msg_decode(msg.value())
            timestamps.append(time) if time is not None else None
            data.append(values) if time is not None else None
        #TODO:// assert there is not big time gap in the data

        if len(data) < self.__num_msgs:
            return None

        print(timestamps[0], data[0]) if self.__verbose else None

        data = tuple(zip(*data))
        self.data.append(data)
        self.timestamps.append(timestamps[0])

        print(f"INFO: Sucessfully Read a chunk") if self.__verbose else None

    def stop(self):
        self.__consumer.close()
        pass

    def drain(self):
        self.__num_msgs = 100000
        for i in range(0, 10):
            self.listen()
        "bootstrap.servers": BOOTSTRAP_SERVERS,
        "group.id": GROUP,
        # by using earliest offset reset and autocommit we have "at-least-once" semantic (default)
        # "at-most-once" and "exactly-once" are also available
        # https://docs.confluent.io/platform/current/clients/consumer.html
        "auto.offset.reset": "earliest",
        "enable.auto.commit": "true",
    })

    @atexit.register
    def finisher():
        # close consumer on exit
        c.close()

    # can subscribe to multiple topics
    c.subscribe(TOPICS)

    while True:
        # poll for new message at most 1 second
        msg = c.poll(1.0)

        if msg is None:
            continue
        if msg.error():
            print("Consumer error: {}".format(msg.error()))
            continue

        topic = msg.topic()
        key = msg.key().decode()
        value = msg.value().decode()
Exemplo n.º 20
0
class AsyncWorker(object):
    """
    Fetches from Kafka topics and processes them.

    :param consumer_topic: Name of the Kafka topic for consume.
    :type consumer_topic: str
    :param service: Service function which is executed every time when job is processed.
    Service must get as argument str or dict type object.
    :type service: callable
    :param consumer_conf: config for Kafka consumer.
    :type consumer_conf: dict
    :param failed_topic: Kafka topic for produce unprocessed messages from consumer_topic.
    :type failed_topic: str
    :param producer_conf: config for Kafka producer for producing unprocessed messages.
    :type producer_conf: dict
    """
    def __init__(self, consumer_topic: str, service: Callable,
                 consumer_conf: dict, failed_topic: str, producer_conf: dict):

        self._consumer_topic = consumer_topic
        self._consumer = Consumer(consumer_conf)
        self._service = service
        self._failed_topic = failed_topic  # use naming like <project name>_<version>_<consumer_topic><retry/failed>
        self._producer = AsyncProducer(producer_conf)

    def __repr__(self):
        """Return the string representation of the worker.
        :return: String representation of the worker.
        :rtype: str
        """

        return 'Worker(Consumer={}, consume_topic={})'.format(
            self._consumer, self._consumer_topic)

    def __del__(self):  # pragma: no cover
        # noinspection PyBroadException
        try:
            self._consumer.close()
        except Exception:
            pass

    async def _exec_service(self, message_value):
        if iscoroutinefunction(self._service):
            res = await self._service(message_value)
        else:
            res = self._service(message_value)
        return res

    async def _process_message(self, msg: Message):
        """
        De-serialize message and execute service.
        :param msg: Kafka message.
        :type msg: confluent_kafka.Message`
        """
        LOGGER.info(
            'Processing Message(topic={}, partition={}, offset={}) ...'.format(
                msg.topic, msg.partition, msg.offset))
        service_repr = get_call_repr(self._service)
        LOGGER.info('Executing job {}'.format(service_repr))
        try:
            message_value = _decode_msg_value(msg.value())
            res = await self._exec_service(message_value)

        except KeyboardInterrupt:
            LOGGER.error('Job was interrupted: {}'.format(msg.offset()))

        except Exception as err:
            LOGGER.exception('Job {} raised an exception: {}'.format(
                msg.offset(), err))

            await self._producer.produce(topic=self._failed_topic,
                                         value=msg.value(),
                                         error=str(err))
        else:
            LOGGER.info('Job {} returned: {}'.format(msg.offset(), res))

    @property
    def consumer_topic(self):
        """Return the name of the Kafka topic.
        :return: Name of the Kafka topic.
        :rtype: str
        """
        return self._consumer_topic

    @property
    def consumer(self):
        """Return the Kafka consumer instance.
        :return: Kafka consumer instance.
        :rtype: kafka.KafkaConsumer
        """
        return self._consumer

    @property
    def service(self):
        """Return the service function.
        :return: Callback function, or None if not set.
        :rtype: callable | None
        """
        return self._service

    async def start(self,
                    max_messages: int = math.inf,
                    commit_offsets: bool = True) -> int:
        """Start processing Kafka messages and executing jobs.
        :param max_messages: Maximum number of Kafka messages to process before stopping. If not set, worker runs until
        interrupted.

        :type max_messages: int
        :param commit_offsets: If set to True, consumer offsets are committed every time a message is processed
        (default: True).
        :type commit_offsets: bool
        :return: Total number of messages processed.
        :rtype: int
        """
        LOGGER.info('Starting {} ...'.format(self))

        self._consumer.unsubscribe()
        self._consumer.subscribe([self.consumer_topic])
        LOGGER.info(" Try get messages from position: {}".format(
            self._consumer.position(self._consumer.assignment())))
        messages_processed = 0
        while messages_processed < max_messages:
            loop = asyncio.get_event_loop()
            # awaiting place for processing messages in other coroutines
            messages = await loop.run_in_executor(
                None, partial(self._consumer.consume, 10, 2.0))
            LOGGER.debug(" Try get messages from position: {}".format(
                self._consumer.position(self._consumer.assignment())))
            if not messages:
                LOGGER.debug("Messages not found")
                continue
            for msg in messages:
                if msg.error():
                    LOGGER.error("Consumer error: {}".format(msg.error()))
                LOGGER.info("Get message with offset {}".format(msg.offset()))
                asyncio.create_task(self._process_message(msg))
            if commit_offsets:
                self._consumer.commit()

            messages_processed += 1
        self._consumer.close()
        return messages_processed
Exemplo n.º 21
0
    class Kafka(object):
        def __init__(self, target_key) -> None:
            super().__init__()
            self.address = _address_for_key(target_key)
            kafka_config = {
                'bootstrap.servers': self.address,
                'group.id': "up9-test-group",
                'enable.auto.commit':
                'false'  # important for passive observing
            }
            if "ssl://" in self.address.lower():
                kafka_config['security.protocol'] = 'SSL'

            self.consumer = Consumer(kafka_config)
            self.producer = Producer(kafka_config)
            self.watching_topics = []

            self.consumer.list_topics(timeout=5)  # to check for connectivity

        def watch_topics(self, topics: list):
            def my_on_assign(consumer, partitions):
                logging.debug("On assign: %r", partitions)
                consumer.assign(partitions)
                for partition in partitions:
                    low, high = consumer.get_watermark_offsets(partition)
                    partition.offset = high
                    logging.debug("Setting offset: %r", partition)
                    consumer.seek(partition)

            self.watching_topics.extend(topics)
            self.consumer.subscribe(topics, on_assign=my_on_assign)
            self.consumer.poll(0.01)  # to trigger partition assignments

        def get_watched_messages(self, interval=0.0, predicate=lambda x: True):
            logging.debug(
                "Checking messages that appeared on kafka topics: %r",
                self.watching_topics)
            res = []

            start = time.time()
            while True:
                msg = self.consumer.poll(interval)
                if msg is None or time.time() - start > interval:
                    break  # done reading

                if msg.error():
                    raise KafkaException("kafka consumer error: {}".format(
                        msg.error()))

                logging.debug(
                    "Potential message: %r",
                    (msg.partition(), msg.key(), msg.headers(), msg.value()))
                if predicate(msg):
                    res.append(msg)

            # TODO: consumer.close()
            return res

        def assert_seen_message(self, resp, delay=0, predicate=lambda x: True):
            @recorder.assertion_decorator
            def assert_seen_kafka_message(resp, topics, delay):
                messages = self.get_watched_messages(delay, predicate)
                messages = [(m.topic(), m.key(), m.value(), m.headers())
                            for m in messages]
                if not messages:
                    raise AssertionError("No messages on Kafka topic %r" %
                                         topics)
                else:
                    logging.info("Validated the messages have appeared: %s",
                                 messages)

                return messages

            return assert_seen_kafka_message(resp, self.watching_topics, delay)

        def put(self, topic, data=None, json=None, headers=None):
            # TODO: parse key out of URL
            if topic.startswith('/'):
                topic = topic[1:]

            if data is None and json is not None:
                data = json_lib.dumps(json)

            with apiritif.transaction('kafka://[' + self.address + ']/' +
                                      topic):
                logging.info("Sending message to Kafka topic %r: %r", topic,
                             data)
                self.producer.produce(
                    topic, data, headers=[] if headers is None else headers)
                self.producer.poll(0)
                self.producer.flush()

                wrapped_req = self._make_request(
                    'PUT',
                    'kafka://' + self.address.split(',')[0] + '/' + topic,
                    data)
                wrapped_response = self._make_response(wrapped_req)
                recorder.record_http_request('PUT', self.address, wrapped_req,
                                             wrapped_response,
                                             _context.session)

            return wrapped_response

        def _make_request(self, method, url, request):
            req = requests.Request(method, url=url, data=request)
            prepared = req.prepare()
            _context.grpc_mapping[id(request)] = prepared
            return prepared

        def _make_response(self, wrapped_req):
            resp = requests.Response()
            resp.status_code = 202
            resp.request = wrapped_req
            resp._request = wrapped_req
            resp.msg = 'Accepted'
            resp.raw = io.BytesIO()
            return resp
Exemplo n.º 22
0
    def consume(self, topic, topic_timeout):
        kafka_config_consumer = ConfigFactory(kafka_client="consumer")
        config = kafka_config_consumer.config
        log.info("kafka config for consume %s", config)
        consumer = Consumer(config)

        events = []

        start_time = time.monotonic()
        timeout_start_time = start_time
        timeout_consumer = 10.0

        # actual consumer starts now
        # subscribe to 1 or more topics and define the callback function
        # callback is only received after consumer.consume() is called!
        consumer.subscribe([topic], on_assign=self.callback_on_assignment)
        log.info(
            f"Waiting for partition assignment ... (timeout at {timeout_consumer} seconds"
        )
        try:
            while (time.monotonic() - timeout_start_time) < timeout_consumer:
                # start consumption
                messages = consumer.consume(timeout=0.1)
                # check for partition assignment
                if self.consume_lock == ConsumerState.PARTITIONS_UNASSIGNED:
                    # this should not happen but we are not 100% sure
                    if messages:
                        log.error("messages consumed but lock is unopened")
                        break
                    continue
                # after partition assignment set the timeout again
                # and reset the start time from which to determine timeout
                # violation
                elif self.consume_lock == ConsumerState.PARTITIONS_ASSIGNED:

                    timeout_start_time = time.monotonic()
                    timeout_consumer = topic_timeout

                    self.consume_lock = ConsumerState.TIMEOUT_SET
                    log.info("Lock has been opened, consuming ...")

                # appened messages to the events list to be returned
                if messages:
                    for msg in messages:
                        log.info(f"message at offset: {msg.offset()}, \
                                partition: {msg.partition()}, \
                                topic: {msg.topic()}")
                        # TODO: allow assertions to be on message headers etc.
                        # events.append({
                        #     "key": msg.key,
                        #     "headers": msg.headers,
                        #     "value": msg.value()
                        # })
                        events.append(msg.value())
            # only executed when while condition becomes false
            else:
                # at the end check if the partition assignment was achieved
                if self.consume_lock != ConsumerState.TIMEOUT_SET:
                    log.error("No partition assignments received in time")

        except KafkaException as e:
            log.error(f"Kafka error: {e}")
            pass

        finally:
            consumer.close()

        end_time = time.monotonic()
        log.debug(f"this cycle took: {(end_time - start_time)} seconds")

        return events
Exemplo n.º 23
0
def compute_achieved_throughput(broker, partitions_with_offsets, result_dict):
    partitions_with_offsets = {}
    input_consumer = Consumer({
        'bootstrap.servers': broker,
        'group.id': str(uuid.uuid4()),
        # 'group.id': 'achieved_throughput_measurer',
        'auto.offset.reset': 'earliest',
        'enable.auto.commit': True,
        'auto.commit.interval.ms': 1000,
        'api.version.request': True,
        'max.poll.interval.ms': 60000
    })

    output_consumer = Consumer({
        'bootstrap.servers': broker,
        'group.id': str(uuid.uuid4()),
        # 'group.id': 'achieved_throughput_measurer',
        'auto.offset.reset': 'earliest',
        'enable.auto.commit': True,
        'auto.commit.interval.ms': 1000,
        'api.version.request': True,
        'max.poll.interval.ms': 60000
    })

    if 'input' in partitions_with_offsets and len(
            partitions_with_offsets['input']) > 0:
        input_consumer.assign(partitions_with_offsets['input'])
    else:
        input_consumer.subscribe(['read', 'update', 'transfer'])

    if 'output' in partitions_with_offsets and len(
            partitions_with_offsets['output']) > 0:
        output_consumer.assign(partitions_with_offsets['output'])
    else:
        output_consumer.subscribe(['responses'])

    while True:
        msgs = input_consumer.consume(timeout=5, num_messages=500)
        if len(msgs) == 0:
            break
        for msg in msgs:
            try:
                wrapped = Wrapper()
                wrapped.ParseFromString(msg.value())

                result = {}
                result['operation'] = msg.topic()
                result['input_time'] = msg.timestamp()[1]
                result_dict[wrapped.request_id] = result
            except DecodeError as e:
                print("Could not decode?")
                pass

    partitions_with_offsets['input'] = input_consumer.position(
        input_consumer.assignment())
    input_consumer.close()

    total_messages = 0
    start_time = 0
    end_time = 0
    first = True

    while True:
        msgs = output_consumer.consume(timeout=5, num_messages=500)
        if len(msgs) == 0:
            break
        for msg in msgs:
            response = Response()
            response.ParseFromString(msg.value())
            key = response.request_id
            status_code = response.status_code
            if key in result_dict:
                if first:
                    start_time = msg.timestamp()[1] / 1000
                    first = False
                total_messages += 1
                end_time = msg.timestamp()[1] / 1000
                result_dict[key]['output_time'] = msg.timestamp()[1]
                result_dict[key]['status_code'] = status_code

    partitions_with_offsets['output'] = output_consumer.position(
        output_consumer.assignment())
    output_consumer.close()

    print("Total messages considered: " + str(total_messages))

    if total_messages == 0 or end_time - start_time == 0:
        return 0

    return total_messages / (end_time - start_time)