Exemplos de AvroConsumer.seek em Python, exemplos de confluent_kafka.avro.AvroConsumer.seek em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: Server.py Projeto: teomores/kafka-twitter

def batch_filtering(cityfilter='ALL', mentionfilter='ALL', tagfilter='ALL'):
    if 'username' in request.cookies:
        username = request.cookies['username']
        print(f"Ok, {username}, let's fetch the latest tweets!")
        c = AvroConsumer({
            'bootstrap.servers': BOOTSTRAP_SERVERS,
            'group.id': username,
            'schema.registry.url': SCHEMA_REGISTRY_URL,
            #'isolation.level': 'read_committed'
        })
        c.assign([TopicPartition(TOPIC, 0, 0)])
        low_offset, high_offset = c.get_watermark_offsets(
            TopicPartition(TOPIC, 0))
        #print(f"the latest offset is {high_offset}, the low is {low_offset}")

        # move consumer to offset=high_offset-WINDOW_LEN (only if > 0)
        if high_offset - WINDOW_LEN > 0:
            new_offset = high_offset - WINDOW_LEN
        else:
            new_offset = low_offset
        c.seek(TopicPartition(TOPIC, 0, new_offset))

        msgs = []  # to store the messages to be returned
        pos = c.position([TopicPartition(TOPIC, 0, new_offset)])
        while pos[0].offset < high_offset:
            try:
                msg = c.poll(0)

            except SerializerError as e:
                print("Message deserialization failed for {}: {}".format(
                    msg, e))
                break

            if msg is None:
                continue

            if msg.error():
                print("AvroConsumer error: {}".format(msg.error()))
                continue

            author = msg.value()['author']
            content = msg.value()['content']
            #kafka_timestamp = datetime.datetime.fromtimestamp(float(msg.timestamp()[1]/1000)).strftime('%H:%M:%S, %d-%m-%Y')
            timestamp = datetime.datetime.fromtimestamp(
                float(msg.value()['timestamp'])).strftime('%H:%M:%S, %d-%m-%Y')
            message_ts = float(msg.value()['timestamp'])
            location = msg.value()['location']
            tags = [h[1:] for h in content.split() if h.startswith('#')]
            mentions = [h[1:] for h in content.split() if h.startswith('@')]
            display_message = f"[{author}] {content} ({location} - {timestamp})"
            print(f"[{author}] {content} ({location} - {timestamp})")
            #print(f"consumer position: {c.position([TopicPartition(TOPIC, 0, new_offset)])}")
            pos = c.position([TopicPartition(TOPIC, 0, new_offset)])

            if cityfilter != 'ALL' and mentionfilter != 'ALL' and tagfilter != 'ALL':
                if (location.lower() == cityfilter) and (
                        mentionfilter.lower()
                        in mentions) and (tagfilter.lower() in tags):
                    msgs.append((display_message, message_ts))
            elif cityfilter == 'ALL' and mentionfilter != 'ALL' and tagfilter != 'ALL':
                if (mentionfilter.lower() in mentions) and (tagfilter.lower()
                                                            in tags):
                    msgs.append((display_message, message_ts))
            elif cityfilter != 'ALL' and mentionfilter == 'ALL' and tagfilter != 'ALL':
                if (location.lower() == cityfilter) and (tagfilter.lower()
                                                         in tags):
                    msgs.append((display_message, message_ts))
            elif cityfilter != 'ALL' and mentionfilter != 'ALL' and tagfilter == 'ALL':
                if (location.lower() == cityfilter) and (mentionfilter.lower()
                                                         in mentions):
                    msgs.append((display_message, message_ts))
            elif cityfilter != 'ALL' and mentionfilter == 'ALL' and tagfilter == 'ALL':
                if (location.lower() == cityfilter):
                    msgs.append((display_message, message_ts))
            elif cityfilter == 'ALL' and mentionfilter != 'ALL' and tagfilter == 'ALL':
                if (mentionfilter.lower() in mentions):
                    msgs.append((display_message, message_ts))
            elif cityfilter == 'ALL' and mentionfilter == 'ALL' and tagfilter != 'ALL':
                if (tagfilter.lower() in tags):
                    msgs.append((display_message, message_ts))
            else:
                msgs.append((display_message, message_ts))
        c.close()
        # finally return dictonary of messages
        msgs = list(
            set(msgs)
        )  # this is done to ensure that no duplicates of a message are shown in timeline
        msgs = sorted(msgs, key=lambda x: x[1])
        msgs = [m[0] for m in msgs]
        print(msgs)
        return {"results": msgs}
    else:
        return {"results": ['Oooops, your are not logged in...']}

Exemplo n.º 2

0

Exibir arquivo

    data = msg.value()
    key = msg.key()
    last_offset = int(msg.offset())
    partition = msg.partition()
    topic = msg.topic()
    print('topic', topic)
    print('partition', partition)
    print('last_offset', last_offset, type(last_offset))
    print('key', key)
    print('value', data)
    print('')
    print('last ten messages:')
    total = 0

    for i in range(10):

        p = TopicPartition('ten-messages-average4', 0, last_offset-9+i)
        c.seek(p)
        curr_msg = c.poll(10)
        curr_data = curr_msg.value()
        curr_offset = curr_msg.offset()
        print(curr_data, curr_offset)

        total+=curr_data

    print('total last ten messages: ', total)
    print('average: ', total/10)


c.close()

Exemplo n.º 3

0

Exibir arquivo

Arquivo: Server.py Projeto: teomores/kafka-twitter

def streaming_filtering():
    cityfilter = request.form['cityfilter']
    mentionfilter = request.form['mentionfilter']
    tagfilter = request.form['tagfilter']
    print(f'cityfilter: {cityfilter}')
    print(f'mentionfilter: {mentionfilter}')
    print(f'tagfilter: {tagfilter}')

    if 'username' in request.cookies:
        username = request.cookies['username']
        print(f"Ok, {username}, let's stream the latest tweets!")
        c = AvroConsumer({
            'bootstrap.servers': BOOTSTRAP_SERVERS,
            'group.id': username,
            'schema.registry.url': SCHEMA_REGISTRY_URL
        })
        c.assign([TopicPartition(TOPIC, 0, 0)])
        low_offset, high_offset = c.get_watermark_offsets(
            TopicPartition(TOPIC, 0))
        print(f"the latest offset is {high_offset}, the low is {low_offset}")
        print(f"consumer position: {c.position([TopicPartition(TOPIC, 0)])}")

        # move consumer to top
        c.seek(TopicPartition(TOPIC, 0, high_offset))

        msgs = []
        pos = c.position([TopicPartition(TOPIC, 0, high_offset)])

        def gen(msgs):  # generator funciton for streaming
            print('ciao')
            while True:
                try:
                    msg = c.poll(1)

                except SerializerError as e:
                    print("Message deserialization failed for {}: {}".format(
                        msg, e))
                    break

                if msg is None:
                    current_ts = time.time()
                    msgs = [
                        m for m in msgs
                        if (float(current_ts) -
                            float(m[1])) < STREAMING_WINDOW_SECONDS
                    ]
                    ret_msgs = [m[0] for m in msgs]
                    yield f' `{json.dumps(ret_msgs)}` '
                    continue

                if msg.error():
                    current_ts = time.time()
                    msgs = [
                        m for m in msgs
                        if (float(current_ts) -
                            float(m[1])) < STREAMING_WINDOW_SECONDS
                    ]
                    ret_msgs = [m[0] for m in msgs]
                    yield f' `{json.dumps(ret_msgs)}` '
                    print("AvroConsumer error: {}".format(msg.error()))
                    continue

                # get message fields
                author = msg.value()['author']
                content = msg.value()['content']
                #kafka_timestamp = datetime.datetime.fromtimestamp(float(msg.timestamp()[1]/1000)).strftime('%H:%M:%S, %d-%m-%Y')
                timestamp = datetime.datetime.fromtimestamp(
                    float(msg.value()['timestamp'])).strftime(
                        '%H:%M:%S, %d-%m-%Y')
                location = msg.value()['location']
                tags = [h[1:] for h in content.split() if h.startswith('#')]
                mentions = [
                    h[1:] for h in content.split() if h.startswith('@')
                ]
                # create display_message
                display_message = f"[{author}] {content} ({location} - {timestamp})"
                display_message = display_message.replace(
                    "`", "'")  # serve per leggere lo streaming
                message_ts = float(msg.value()['timestamp'])
                print(f"{display_message}")
                print(
                    f"consumer position: {c.position([TopicPartition(TOPIC, 0, high_offset)])}"
                )
                pos = c.position([TopicPartition(TOPIC, 0, high_offset)])
                print('prima')
                print(f'cityfilter: {cityfilter}')
                print(f'mentionfilter: {mentionfilter}')
                print(f'tagfilter: {tagfilter}')

                if cityfilter != 'ALL' and mentionfilter != 'ALL' and tagfilter != 'ALL':
                    if (location.lower() == cityfilter) and (
                            mentionfilter.lower()
                            in mentions) and (tagfilter.lower() in tags):
                        msgs.append((display_message, message_ts))
                elif cityfilter == 'ALL' and mentionfilter != 'ALL' and tagfilter != 'ALL':
                    if (mentionfilter.lower()
                            in mentions) and (tagfilter.lower() in tags):
                        msgs.append((display_message, message_ts))
                elif cityfilter != 'ALL' and mentionfilter == 'ALL' and tagfilter != 'ALL':
                    if (location.lower() == cityfilter) and (tagfilter.lower()
                                                             in tags):
                        msgs.append((display_message, message_ts))
                elif cityfilter != 'ALL' and mentionfilter != 'ALL' and tagfilter == 'ALL':
                    if (location.lower()
                            == cityfilter) and (mentionfilter.lower()
                                                in mentions):
                        msgs.append((display_message, message_ts))
                elif cityfilter != 'ALL' and mentionfilter == 'ALL' and tagfilter == 'ALL':
                    if (location.lower() == cityfilter):
                        msgs.append((display_message, message_ts))
                elif cityfilter == 'ALL' and mentionfilter != 'ALL' and tagfilter == 'ALL':
                    if (mentionfilter.lower() in mentions):
                        msgs.append((display_message, message_ts))
                elif cityfilter == 'ALL' and mentionfilter == 'ALL' and tagfilter != 'ALL':
                    if (tagfilter.lower() in tags):
                        msgs.append((display_message, message_ts))
                else:
                    msgs.append((display_message, message_ts))

                # remove old messages
                current_ts = time.time()
                msgs = [
                    m for m in msgs if (float(current_ts) -
                                        float(m[1])) < STREAMING_WINDOW_SECONDS
                ]
                #msgs = list(set(msgs))
                msgs = sorted(msgs, key=lambda x: x[1])
                ret_msgs = [m[0] for m in msgs]
                yield f' `{json.dumps(ret_msgs)}` '

        return Response(stream_with_context(gen(msgs)))
    else:
        return {"results": ['Oooops, your are not logged in...']}