示例#1
0
def retrieve_partition_offset():
    consumer = KafkaConsumer(bootstrap_servers=servers,
                             group_id='kafka-group-id')
    tp = TopicPartition('kafka-topic', 0)
    consumer.assign([tp])
    print("starting offset is ", consumer.position(tp))
def read_messages():
    if ARG.SERVER:
        server_list = [ARG.SERVER + ':9092']
    else:
        server_list = [
            'kafka.int.janelia.org:9092', 'kafka2.int.janelia.org:9092',
            'kafka3.int.janelia.org:9092'
        ]
    if not ARG.GROUP:
        ARG.GROUP = None
    consumer = KafkaConsumer(bootstrap_servers=server_list,
                             auto_offset_reset=ARG.OFFSET,
                             consumer_timeout_ms=int(5000))
    topics = consumer.topics()
    for topic in tqdm(sorted(topics)):
        COUNT['topics'] += 1
        parts = consumer.partitions_for_topic(topic)
        if parts:
            partitions = [TopicPartition(topic, p) for p in parts]
            eoff = consumer.end_offsets(partitions)
            maxoff = 0
            partnum = -1
            for key in eoff:
                if eoff[key] > maxoff:
                    maxoff = eoff[key]
                    partnum = key.partition
            if not maxoff:
                EMPTY.write("%s\n" % (topic))
                COUNT['empty'] += 1
                continue
            part = TopicPartition(topic, 0)
            consumer.assign([part])
            consumer.seek(part, maxoff - 1)
            for msg in consumer:
                if msg.timestamp == -1:
                    ERROR.write("%s: %s\n" % (topic, msg))
                    COUNT['timestamp'] += 1
                    break
                today = datetime.today()
                delta = (today -
                         datetime.fromtimestamp(msg.timestamp / 1000)).days
                timestr = strftime("%Y-%m-%d %H:%M:%S %Z",
                                   localtime(msg.timestamp / 1000))
                if delta >= 365:
                    OUTPUT.write("%s\t%s\t%s\n" % (topic, timestr, delta))
                    COUNT['old'] += 1
                else:
                    COUNT['current'] += 1
                break
    print("Topics:                   %d" % (COUNT['topics']))
    print("Topics >= 1 year old:     %d" % (COUNT['old']))
    print("Topics < 1 year old:      %d" % (COUNT['current']))
    print("Empty topics:             %d" % (COUNT['empty']))
    print("Topics missing timestamp: %d" % (COUNT['timestamp']))
    EMPTY.close()
    if not COUNT['empty']:
        remove(EMPTY_FILE)
    ERROR.close()
    if not COUNT['timestamp']:
        remove(ERROR_FILE)
    OUTPUT.close()
    if not COUNT['old']:
        remove(OUTPUT_FILE)
# @Software: PyCharm
"""
import time
import json
from kafka import KafkaConsumer, TopicPartition
from hdfs import InsecureClient

consumer = KafkaConsumer('kzmg_all_payment',
                         bootstrap_servers=['172.23.11.150:9092'])
# print (consumer.partitions_for_topic("kzmg_all_payment"))  # 获取phone-game-userinfo主题的分区信息
print(consumer.topics())  # 获取主题列表
# print (consumer.subscription())  # 获取当前消费者订阅的主题
# print (consumer.assignment())  # 获取当前消费者topic、分区信息
# print (consumer.beginning_offsets(consumer.assignment()))  # 获取当前消费者可消费的偏移量
# print(consumer.end_offsets(consumer.assignment()))
consumer.seek(TopicPartition(topic=u'kzmg_all_payment', partition=0), 125000)
num = consumer.end_offsets(consumer.assignment()).values()[0]
print(num)
i = 0
# t= '2018-05-22'
# timeArray =time.strptime(t,'%Y-%m-%d')
# timeStamp=int(time.mktime(timeArray))
# print(consumer.offsets_for_times({TopicPartition(topic='kzmg_all_payment', partition=0):timeStamp}))
client = InsecureClient('http://lg-11-152.ko.cn:50070', user='******')
print(dir(client))
filePath = '/user/kzcq/datatest/kzmg_payment.json'
tag_list = []

# for message in consumer:
#     print ("%s:%d:%d: key=%s value=%s" % (message.topic, message.partition,
#                                           message.offset, message.key,
示例#4
0
def hgssh():
    '''hgssh component of the vcsreplicator bootstrap procedure.'''
    import argparse

    parser = argparse.ArgumentParser()
    parser.add_argument('config', help='Path to config file')
    parser.add_argument('hg', help='Path to hg executable for use in bootstrap process')
    parser.add_argument('--workers', help='Number of concurrent workers to use for publishing messages', type=int,
                        default=multiprocessing.cpu_count())
    args = parser.parse_args()

    config = Config(filename=args.config)

    topic = config.c.get('replicationproducer', 'topic')

    # Create consumer to gather partition offsets
    consumer_config = {
        # set this so offsets are committed to Zookeeper
        'api_version': (0, 8, 1),
        'bootstrap_servers': config.c.get('replicationproducer', 'hosts'),
        'enable_auto_commit': False,  # We don't actually commit but this is just for good measure
    }
    consumer = KafkaConsumer(**consumer_config)

    partitions = consumer.partitions_for_topic(topic)

    # Gather the initial offsets
    topicpartitions = [
        TopicPartition(topic, partition_number)
        for partition_number in sorted(partitions)
    ]
    offsets_start = consumer.end_offsets(topicpartitions)
    logger.info('gathered initial Kafka offsets')

    # Mapping of `replicatesync` future to corresponding repo name
    replicatesync_futures = {}
    with futures.ThreadPoolExecutor(args.workers) as e:
        # Create a future which makes a `replicatesync` call
        # for each repo on hg.mo
        for repo in find_hg_repos(REPOS_DIR):
            # Create a future to call `replicatesync` for this repo
            replicatesync_args = [
                args.hg,
                '-R', repo,
                'replicatesync',
                '--bootstrap',
            ]
            replicatesync_futures.update({
                e.submit(subprocess.check_output, replicatesync_args): repo
            })

            logger.info('calling `replicatesync --bootstrap` on %s' % repo)

        # Execute the futures and raise an Exception on fail
        for future in futures.as_completed(replicatesync_futures):
            repo = replicatesync_futures[future]

            exc = future.exception()
            if exc:
                logger.error('error occurred calling `replicatesync --bootstrap` on %s: %s' % (repo, exc))
                raise Exception('error triggering replication of Mercurial repo %s: %s' %
                                (repo, exc))
            logger.info('called `replicatesync --bootstrap` on %s successfully' % repo)

    # Gather the final offsets
    offsets_end = consumer.end_offsets(topicpartitions)
    logger.info('gathered final Kafka offsets')

    # Create map of partition numbers to (start, end) offset tuples
    offsets_combined = {
        int(topicpartition.partition): (offsets_start[topicpartition], offsets_end[topicpartition])
        for topicpartition in topicpartitions
    }

    # Create JSON for processing in ansible and print to stdout
    # Convert repo paths into their wire representations
    output = {
        'offsets': offsets_combined,
        'repositories': sorted([
            config.get_replication_path_rewrite(repo)
            for repo in replicatesync_futures.values()
        ]),
    }

    print(json.dumps(output))
    logger.info('hgssh bootstrap process complete!')
from kafka import KafkaConsumer, TopicPartition
from config import *

string_deserializer = lambda x: x.decode('utf-8')

#################### Using seek() ###################

# Don't give topic name while creating the consumer, instead use assign method as below
consumer = KafkaConsumer(group_id='some_consumer_group',
                         bootstrap_servers=[BOOTSTRAP_SERVERS],
                         value_deserializer=string_deserializer,
                         auto_offset_reset='latest',
                         consumer_timeout_ms=100000)

partition0 = TopicPartition('string-topic', 0)
partition1 = TopicPartition('string-topic', 1)
partition2 = TopicPartition('string-topic', 2)
consumer.assign([partition0, partition1, partition2])

# Assume the consumer has consumed all messages from all partitions.
# If my current offset is 54 for partition0 after consuming all messages then doing seek on 52 as below will display 2 messages.
consumer.seek(partition0, 52)

for msg in consumer:
    print("Consumed[%s-%d] %d: key=%s value=%s" %
          (msg.topic, msg.partition, msg.offset, msg.key, msg.value))
示例#6
0
def property_loop_start():
    from device.models import Device
    from tag.models import Tag, TagTrack
    from tag.services import run_callbacks

    consumer = KafkaConsumer(bootstrap_servers=('124.70.129.107:9094',
                                                '124.70.193.90:9094',
                                                '124.70.217.193:9094 '))
    topic = TopicPartition(topic='saveProps', partition=2)
    consumer.assign([topic])
    for msg in consumer:
        try:
            print('save prop req')
            target: ConsumerRecord = msg
            data = json.loads(target.value)
            raw_tags = data['services'][0]['properties']['tags']
            print(raw_tags)
            if raw_tags is None:
                tags = []
            else:
                tags = parse_tags_byte_stream(bytes.fromhex(raw_tags))
                if tags is None:
                    continue

            try:
                device = Device.objects.get(device_id=data['device_id'])
            except Device.DoesNotExist:
                print('Unknown device with ID: {}'.format(data['device_id']))
                continue

            event_time = parse(data['services'][0]['event_time'])

            detected_tags = []
            for tid, reader1_id, reader1_dis, reader2_id, reader2_dis, reader3_id, reader3_dis in tags:
                reader1 = get_reader(reader1_id, device)
                reader2 = get_reader(reader2_id, device)
                reader3 = get_reader(reader3_id, device)

                tag = Tag.objects.get_or_create(
                    tid=tid,
                    device=device,
                    defaults={'name': 'TAG_' + str(tid)})[0]
                TagTrack.objects.create(tag=tag,
                                        reader1=reader1,
                                        distance1=reader1_dis,
                                        reader2=reader2,
                                        distance2=reader2_dis,
                                        reader3=reader3,
                                        distance3=reader3_dis,
                                        created=event_time)
                tag.is_online = True
                tag.save()
                detected_tags.append(tag.id)

            # 存在性检测
            # 规则:只检查active标签、active基站且原本online标签的存在性
            invalid_tags = Tag.objects\
                .filter(
                    device=device,
                    device__is_active=True,
                    is_active=True,
                    is_online=True
                )\
                .exclude(id__in=detected_tags)
            invalid_tags_all = list(invalid_tags.all())
            invalid_tags.update(is_online=False)
            for tag in invalid_tags_all:
                print('Callback run: Tag {}'.format(tag))
                run_callbacks(tag, 'lost_signal')

        except (TypeError, KeyError, json.JSONDecodeError):
            print('Malformed data: {}'.format(msg))
示例#7
0
        self._create_kafka_consumer()

    def reset_consumer_server_topic_and_partition(self, server, topic,
                                                  partition):
        self.bootstrap_servers = server
        self.topic = topic
        self.partition = partition
        self._create_kafka_consumer()


if __name__ == "__main__":
    config = {
        'group_id': 'g_2',
        'client_id': 12,
        'topic': 'topic_yang',
        'partition': 0,
        'bootstrap_servers': '172.31.32.39:9092'
    }
    con = ConsumerOperate(config)
    con.set_consumer_timeout(20000)
    print(
        con.consumer.committed(TopicPartition(topic='test33333', partition=0)))

    for i in con.consumer:
        print("fetching")
        print(i.topic, i.offset)
        con.commit_offset()
        print(
            con.consumer.committed(
                TopicPartition(topic='topic_yang', partition=0)))
示例#8
0
def test_seek(consumer):
    partition = TopicPartition('test', 2)
    consumer.assign([partition])
    consumer.seek(partition, 3)
def create_topic_partition(topic: str):
    return TopicPartition(topic=topic, partition=0)
示例#10
0
def test_commit(consumer):
    partition = TopicPartition('test', 2)
    offset_metadata = OffsetAndMetadata(2, 'xx')
    response = consumer.commit({partition: offset_metadata})
    print(response)
示例#11
0
def test_position(consumer):
    partition = TopicPartition('test', 2)
    consumer.assign([partition])
    response = consumer.position(partition)
    print(response)
示例#12
0
def test_seek_to_end(consumer):
    partition = TopicPartition('test', 2)
    consumer.assign([partition])
    response = consumer.seek_to_end(partition)
    print(response)
示例#13
0
def test_seek_to_beginning(consumer):
    partition = TopicPartition('test', 2)
    consumer.assign([partition])
    consumer.seek_to_beginning(partition)
    response = consumer.position(partition)
    print(response)
示例#14
0
import time
from PIL import Image
import datetime
import os
import cv2
import numpy as np
# settings
client = "192.168.100.100:9092"
topic = 'video'
path = "/home/mooc/videoexample/consumed/"
#consumer = KafkaConsumer(client)
consumer = KafkaConsumer(bootstrap_servers=client)
total_time = time.time()
for i in range(100):
    topic_mod = topic + str(i)
    partitions = TopicPartition(topic_mod, 0)
    consumer.assign([partitions])
    #consumer = KafkaConsumer(client)
    consumer.seek_to_beginning()
    lastoffset = consumer.end_offsets([partitions])[partitions]
    path_mod = path + topic_mod + "/"
    if not os.path.exists(path_mod):
        os.makedirs(path_mod)
    print("topic name: " + topic_mod)
    print(lastoffset)
    for msg in consumer:
        start_time = time.time()
        array = np.frombuffer(msg.value, dtype=np.dtype('uint8'))
        img = cv2.imdecode(array, 1)
        #cv2.imshow('recv',img)
        #cv2.imwrite(mk_path+str(key)+'.jpg', img)
示例#15
0
def consume(args):
    schema = args.schema
    tables = args.tables
    skip_error = args.skip_error
    assert schema in settings.SCHEMAS, f'schema {schema} must in settings.SCHEMAS'
    topic = settings.KAFKA_TOPIC
    tables_pk = {}
    partitions = []
    for table in tables.split(','):
        assert table in settings.TABLES, f'table {table} must in settings.TABLES'

        partition = settings.PARTITIONS.get(f'{schema}.{table}')
        tp = TopicPartition(topic, partition)
        partitions.append(tp)
        tables_pk[table] = reader.get_primary_key(schema, table)

    group_id = f'{schema}.{tables}'
    consumer = KafkaConsumer(
        bootstrap_servers=settings.KAFKA_SERVER,
        value_deserializer=lambda x: json.loads(x, object_hook=object_hook),
        key_deserializer=lambda x: x.decode() if x else None,
        enable_auto_commit=False,
        group_id=group_id,
        auto_offset_reset='earliest',
    )
    consumer.assign(partitions)

    event_list = {}
    is_insert = False
    last_time = 0
    len_event = 0
    logger.info(f'success consume topic:{topic},partitions:{partitions},schema:{schema},tables:{tables}')

    for msg in consumer:  # type:ConsumerRecord
        logger.debug(f'kafka msg:{msg}')
        event = msg.value
        event_unixtime = event['event_unixtime'] / 10 ** 6
        table = event['table']
        schema = event['schema']
        event_list.setdefault(table, []).append(event)
        len_event += 1

        if last_time == 0:
            last_time = event_unixtime

        if len_event == settings.INSERT_NUMS:
            is_insert = True
        else:
            if event_unixtime - last_time >= settings.INSERT_INTERVAL > 0:
                is_insert = True
        if is_insert:
            data_dict = {}
            events_num = 0
            for table, items in event_list.items():
                for item in items:
                    action = item['action']
                    action_core = item['action_core']
                    data_dict.setdefault(table, {}).setdefault(table + schema + action + action_core, []).append(item)
            for table, v in data_dict.items():
                tmp_data = []
                for k1, v1 in v.items():
                    events_num += len(v1)
                    tmp_data.append(v1)
                try:
                    result = writer.insert_event(tmp_data, schema, table, tables_pk.get(table))
                    if not result:
                        logger.error('insert event error!')
                        if not skip_error:
                            exit()
                except Exception as e:
                    logger.error(f'insert event error!,error:{e}')
                    if not skip_error:
                        exit()
            consumer.commit()
            logger.info(f'commit success {events_num} events!')
            event_list = {}
            is_insert = False
            len_event = last_time = 0
示例#16
0
from json import loads
from kafka import KafkaConsumer, TopicPartition

consumer = KafkaConsumer(bootstrap_servers=['localhost:9092'],
                         auto_offset_reset='latest',
                         enable_auto_commit=False,
                         group_id='my-group',
                         value_deserializer=lambda x: loads(x.decode('utf-8')))

#assign topic to read from
tp = TopicPartition('measurements_node_2182', 0)
consumer.assign([tp])

consumer.seek_to_end(tp)

for message in consumer:
    print(message.value)
示例#17
0
from kafka import KafkaConsumer, KafkaProducer, TopicPartition
from util.http_status_server import HttpHealthServer
from util.task_args import get_kafka_binder_brokers, get_input_channel, get_output_channel

import logging

logger = logging.getLogger('kafka')
logger.addHandler(logging.StreamHandler(sys.stdout))
logger.setLevel(logging.INFO)

logger.warning("Test warning mesage logger 12345 helllo")

consumer = KafkaConsumer(bootstrap_servers=[get_kafka_binder_brokers()],
                         api_version=(0, 9),
                         group_id=None,
                         auto_offset_reset='latest')
producer = KafkaProducer(bootstrap_servers=[get_kafka_binder_brokers()],
                         api_version=(0, 9))

tp = TopicPartition("two-forty.input", 0)
consumer.assign([tp])
consumer.seek_to_end()

HttpHealthServer.run_thread()

counter = 0

while True:
    for message in consumer:
        producer.send("new_message", message)
示例#18
0
#                                           message.offset, message.key,
#                                           message.value.decode('utf-8')))
#     i+=1
#     if i>100: break
"""消费者(手动设置偏移量)"""
# consumer = KafkaConsumer('phone-game-userinfo', bootstrap_servers=['172.23.11.150:9092'])
consumer = KafkaConsumer('phone-game-userlogin-kong',
                         bootstrap_servers=['172.23.11.150:9092'])
print(consumer.partitions_for_topic(
    "phone-game-userinfo"))  # 获取phone-game-userinfo主题的分区信息
print(consumer.topics())  # 获取主题列表
print(consumer.subscription())  # 获取当前消费者订阅的主题
print(consumer.assignment())  # 获取当前消费者topic、分区信息
print(consumer.beginning_offsets(consumer.assignment()))  # 获取当前消费者可消费的偏移量
# consumer.seek(TopicPartition(topic=u'phone-game-userinfo', partition=0), 100875)  # 重置偏移量,从第50个偏移量消费
consumer.seek(TopicPartition(topic=u'phone-game-userlogin-kong', partition=0),
              1)
print(consumer.end_offsets(
    consumer.assignment()))  # Get the last offset for the given partitions
print(
    consumer.end_offsets(
        [TopicPartition(topic='phone-game-userlogin-kong',
                        partition=0)]))  # 同上一句等价
t = '2018-05-10'
timeArray = time.strptime(t, '%Y-%m-%d')
timeStamp = int(time.mktime(timeArray))
print(
    consumer.offsets_for_times({
        TopicPartition(topic='phone-game-userlogin-kong', partition=0):
        timeStamp
    }))
def main():
    config = Configuration()

    # suppress debugging messages of tensorflow
    # os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

    # load the scalers of the training data for the normalisation
    scalers = load_scalers(config)

    consumers = []
    limiting_consumer = None

    selection = ''
    while selection not in ['cbs', 'snn']:
        print(
            'Please select architecture that should be used. Type "snn" or "cbs"'
        )
        selection = input()
    print()

    print('Creating consumers ...\n')

    # if using the fabric simulation start at the start of the topics
    # for live classification start at newest messages possible
    offset = 'earliest' if config.testing_using_fabric_sim else 'latest'

    try:
        # create consumers for all topics
        for topic in config.topic_list:
            c = KafkaConsumer(
                topic,
                bootstrap_servers=config.get_connection(),
                value_deserializer=lambda m: json.loads(m.decode('utf-8')),
                auto_offset_reset=offset)

            # based on the topic select one of the consumers for time interval determination
            if topic == config.limiting_topic:
                limiting_consumer = c

            consumers.append(c)
    except errors.NoBrokersAvailable:
        print(
            'Configured kafka server is not available. Please check the connection or change the configuration.'
        )
        sys.exit(0)

    # create and start a classifier thread that handles the classification of processed examples
    print('\nCreating classifier ...')
    print('\nUsed model file:')
    print(config.directory_model_to_use, '\n')

    print('The classifier will use k=' + str(config.k_of_knn) +
          ' for the k-NN algorithm')
    print(
        'The mean similarity output is calculated on the basis of the k most similar cases'
    )
    print('The time span is the time between the end timestamp of the')
    print('interval and the current time right before the output.')
    print(
        'The total time is the time needed for the completely processing the example,'
    )
    print('including the time in the queue.\n')
    classifier = Classifier(config, selection)
    classifier.start()

    print('Waiting for data to classify ...\n')
    try:

        # classify as until interrupted
        while 1:
            start_time = time.perf_counter()
            # read data for a single example from kafka, results contains lists of single messages
            results = read_single_example(consumers, limiting_consumer, config)

            # combine into a single dataframe
            df = list_to_dataframe(results, config)

            # transform dataframe into a array that can be used as neural network input
            example = df.to_numpy()

            # normalize the data of the example
            example = normalise_dataframe(example, scalers)

            # create a queue element containing
            element = (example, df.index[0], df.index[-1], start_time)

            # add element to the queue of examples to classify
            classifier.examples_to_classify.put(element)

            # reset all consumer offsets by two messages to reduce the time intervals that are left out
            for i in range(len(consumers)):
                partition = TopicPartition(config.topic_list[i], 0)
                last_offset = consumers[i].position(partition)
                new_offset = last_offset - 2 if last_offset - 2 >= 0 else 0
                consumers[i].seek(partition, new_offset)

    except KeyboardInterrupt:
        # interrupt the classifier thread
        print('Exiting ...\n')
        classifier.stop = True
def Consumer(thread_name, topic, partition):
    print(
        thread_name,
        "Starting\tDispose",
    )
    global is_dispose
    broker_list = '172.16.90.63:6667, 172.16.90.58:6667, 172.16.90.59:6667'
    '''
    fetch_min_bytes(int) - 服务器为获取请求而返回的最小数据量,否则请等待
    fetch_max_wait_ms(int) - 如果没有足够的数据立即满足fetch_min_bytes给出的要求,服务器在回应提取请求之前将阻塞的最大时间量(以毫秒为单位)
    fetch_max_bytes(int) - 服务器应为获取请求返回的最大数据量。这不是绝对最大值,如果获取的第一个非空分区中的第一条消息大于此值,
                            则仍将返回消息以确保消费者可以取得进展。注意:使用者并行执行对多个代理的提取,因此内存使用将取决于包含该主题分区的代理的数量。
                            支持的Kafka版本> = 0.10.1.0。默认值:52428800(50 MB)。
    enable_auto_commit(bool) - 如果为True,则消费者的偏移量将在后台定期提交。默认值:True。
    max_poll_records(int) - 单次调用中返回的最大记录数poll()。默认值:500
    max_poll_interval_ms(int) - poll()使用使用者组管理时的调用之间的最大延迟 。这为消费者在获取更多记录之前可以闲置的时间量设置了上限。
                                如果 poll()在此超时到期之前未调用,则认为使用者失败,并且该组将重新平衡以便将分区重新分配给另一个成员。默认300000
    '''
    consumer = KafkaConsumer(
        bootstrap_servers=broker_list,
        group_id="xiaofei",
        client_id=thread_name,
        # auto_offset_reset="smallest",
        enable_auto_commit=False,
        fetch_min_bytes=1024 * 1024,
        # fetch_max_bytes=1024 * 1024 * 1024 * 10,
        fetch_max_wait_ms=60000,
        request_timeout_ms=305000,
        # consumer_timeout_ms=1,
        # max_poll_records=5000,
        # max_poll_interval_ms=60000 无该参数
    )
    dic = get_kafka(topic, partition)

    tp = TopicPartition(topic, partition)
    # print(thread_name, tp, dic['offset'])
    consumer.assign([tp])
    # 重定向分区offset
    consumer.seek(tp, dic['offset'])
    print("程序首次运行\t线程:", thread_name, "分区:", partition, "偏移量:", dic['offset'],
          "\t开始消费...")
    num = 0
    # end_offset = consumer.end_offsets([tp])[tp]
    # print(end_offset)
    while True:
        args = OrderedDict()
        checkThread()
        msg = consumer.poll(timeout_ms=60000)
        end_offset = consumer.end_offsets([tp])[tp]
        print('已保存的偏移量', consumer.committed(tp), '最新偏移量,', end_offset)
        # 测试线程死掉
        # if thread_name=="Thread-1" and num==2:
        #     sys.exit()
        if len(thread_msg) > 0 and is_dispose is True:
            is_dispose = False
            for msg_send in thread_msg:
                exp(msg_send)
                send_msg(msg_send)
            thread_msg.clear()
        if len(msg) > 0:
            print("线程:", thread_name, "分区:", partition, "最大偏移量:", end_offset,
                  "有无数据,", len(msg))
            lines = 0
            for data in msg.values():
                for line in data:
                    lines += 1
                    line = eval(line.value.decode('utf-8'))
                    value, log_name = get_line(col_dic, line)
                    sql = sql_dic[log_name]
                    if value is not None:
                        args.setdefault(sql, []).append(tuple(value))
            print(thread_name, "处理条数", lines)
            # 数据保存至数据库
            is_succeed = save_to_db(args, thread_name)
            if is_succeed:
                # 更新保存在数据库中的分区的偏移量
                is_succeed1 = update_offset(topic, partition, end_offset)
                # 手动提交偏移量到kafka
                consumer.commit(
                    offsets={tp: (OffsetAndMetadata(end_offset, None))})
                # print(thread_name,"to db suss",num+1)
                if is_succeed1 == 0:
                    sys.exit()
            else:
                sys.exit()
        else:
            pass
            # print(thread_name,'没有数据')
        # time.sleep(60)
        num += 1
示例#21
0
def hgweb():
    '''hgweb component of the vcsreplicator bootstrap procedure. Takes a
    vcsreplicator config path on the CLI and takes a JSON data structure
    on stdin'''
    import argparse

    # Configure logging
    logger.setLevel(logging.INFO)
    handler = logging.StreamHandler(sys.stdout)
    handler.setLevel(logging.INFO)
    formatter = logging.Formatter('%(name)s %(message)s')
    formatter.converter = time.gmtime
    handler.setFormatter(formatter)
    logger.addHandler(handler)

    # Parse CLI args
    parser = argparse.ArgumentParser()
    parser.add_argument('config', help='Path of config file to load')
    parser.add_argument('input', help='JSON data input (output from the hgssh bootstrap procedure) file path')
    parser.add_argument('--workers', help='Number of concurrent workers to use for performing clones', type=int,
                        default=multiprocessing.cpu_count())
    args = parser.parse_args()

    logger.info('reading hgssh JSON document')
    with open(args.input, 'r') as f:
        hgssh_data = json.loads(f.read())
        logger.info('JSON document read')

    # Convert the JSON keys to integers
    hgssh_data['offsets'] = {
        int(k): v
        for k, v in hgssh_data['offsets'].items()
    }

    config = Config(filename=args.config)

    consumer_config = {
        # set this so offsets are committed to Zookeeper
        'api_version': (0, 8, 1),
        'bootstrap_servers': config.c.get('consumer', 'hosts'),
        'client_id': config.c.get('consumer', 'client_id'),
        'enable_auto_commit': False,
        'group_id': config.c.get('consumer', 'group'),
        'max_partition_fetch_bytes': MAX_BUFFER_SIZE,
        'value_deserializer': value_deserializer,
    }

    topic = config.c.get('consumer', 'topic')

    topicpartitions = [
        TopicPartition(topic, partition)
        for partition, (start_offset, end_offset)
        in sorted(hgssh_data['offsets'].items())
        # there is no need to do an assignment if the length of the
        # bootstrap message range is 0
        if start_offset != end_offset
    ]

    consumer = KafkaConsumer(**consumer_config)

    # We will remove repos from this set as we replicate them
    # Once this is an empty set we are done
    repositories_to_clone = set(hgssh_data['repositories'])

    extra_messages = collections.defaultdict(collections.deque)  # maps repo names to extra processing messages
    clone_futures_repo_mapping = {}  # maps cloning futures to repo names
    extra_messages_futures_repo_mapping = {}  # maps extra messages futures to repo names

    # Overwrite default hglib path so handle_message_main and it's derivatives
    # use the correct virtualenv
    hglib.HGPATH = config.c.get('programs', 'hg')

    # Maps partitions to the list of messages within the bootstrap range
    aggregate_messages_by_topicpartition = {
        tp.partition: []
        for tp in topicpartitions
    }

    # Gather all the Kafka messages within the bootstrap range for each partition
    for topicpartition in topicpartitions:
        start_offset, end_offset = hgssh_data['offsets'][topicpartition.partition]

        end_offset -= 1

        # Assign the consumer to the next partition and move to the start offset
        logger.info('assigning the consumer to partition %s' % topicpartition.partition)
        consumer.assign([topicpartition])

        logger.info('seeking the consumer to offset %s' % start_offset)
        consumer.seek(topicpartition, start_offset)
        consumer.commit(offsets={
            topicpartition: OffsetAndMetadata(start_offset, '')
        })

        logger.info('partition %s of topic %s moved to offset %s' %
                    (topicpartition.partition, topicpartition.topic, start_offset))

        # Get all the messages we need to process from kafka
        for message in consumer:
            # Check if the message we are processing is within the range of accepted messages
            # If we are in the range, add this message to the list of messages on this partition
            # If we are at the end of the range, break from the loop and move on to the next partition
            if message.offset <= end_offset:
                aggregate_messages_by_topicpartition[message.partition].append(message)
                logger.info('message on partition %s, offset %s has been collected' % (message.partition, message.offset))

            consumer.commit(offsets={
                TopicPartition(topic, message.partition): OffsetAndMetadata(message.offset + 1, ''),
            })

            if message.offset >= end_offset:
                logger.info('finished retrieving messages on partition %s' % message.partition)
                break

    logger.info('finished retrieving messages from Kafka')

    outputdata = collections.defaultdict(list)

    # Process the previously collected messages
    with futures.ThreadPoolExecutor(args.workers) as e:
        for partition, messages in sorted(aggregate_messages_by_topicpartition.items()):
            logger.info('processing messages for partition %s' % partition)
            for message in messages:
                payload = message.value

                # Ignore heartbeat messages
                if payload['name'] == 'heartbeat-1':
                    continue

                if payload['path'] in repositories_to_clone:
                    # If we have not yet replicated the repository for this message,
                    # of the repo sync message is not tagged with the bootstrap flag,
                    # move on to the next message. The assumed upcoming hg-repo-sync-2
                    # message will clone the data represented in this message anyways.
                    if payload['name'] != 'hg-repo-sync-2' or not payload['bootstrap']:
                        continue

                    logger.info('scheduled clone for %s' % payload['path'])

                    # Schedule the repo sync
                    clone_future = e.submit(clone_repo, config, payload['path'],
                                            payload['requirements'], payload['hgrc'],
                                            payload['heads'])

                    # Here we register the future against its repo name
                    clone_futures_repo_mapping[clone_future] = payload['path']

                    # Remove the repo from the set of repos
                    # which have not been scheduled to sync
                    repositories_to_clone.remove(payload['path'])
                else:
                    # If the repo is not in the list of repositories to clone,
                    # then we have already scheduled the repo sync and we will
                    # need to process this message once the sync completes.
                    extra_messages[payload['path']].append((config, payload))
                    logger.info('extra messages found for %s: %s total' %
                                (payload['path'], len(extra_messages[payload['path']]))
                    )

        if repositories_to_clone:
            logger.error('did not receive expected sync messages for %s' % repositories_to_clone)

            # Add errors to audit output
            for repo in repositories_to_clone:
                outputdata[repo].append('did not receive sync message')

        # Process clones
        remaining_clones = len(clone_futures_repo_mapping)
        for completed_future in futures.as_completed(clone_futures_repo_mapping):
            repo = clone_futures_repo_mapping[completed_future]

            exc = completed_future.exception()
            if exc:
                message = 'error triggering replication of Mercurial repo %s: %s' % (repo, str(exc))
                logger.error(message)

                # Add error to audit output
                outputdata[repo].append(message)
            else:
                logger.info('%s successfully cloned' % repo)

            remaining_clones -= 1

            logger.info('%s repositories remaining' % remaining_clones)

            # Schedule extra message processing if necessary
            if repo in extra_messages:
                logger.info('scheduling extra processing for %s' % repo)
                configs, payloads = zip(*extra_messages[repo])
                future = e.submit(map, handle_message_main, configs, payloads)
                extra_messages_futures_repo_mapping[future] = repo

        # Process extra messages
        total_message_batches = len(extra_messages_futures_repo_mapping)
        for completed_future in futures.as_completed(extra_messages_futures_repo_mapping):
            repo = extra_messages_futures_repo_mapping[completed_future]

            exc = completed_future.exception()
            if exc:
                message = 'error processing extra messages for %s: %s' % (repo, str(exc))
                logger.error(message)

                # Add error to audit output
                outputdata[repo].append(message)
            else:
                logger.info('extra processing for %s completed successfully' % repo)

            total_message_batches -= 1
            logger.info('%s batches remaining' % total_message_batches)

    logger.info('%s bootstrap process complete' % config.c.get('consumer', 'group'))

    # If anything broke, dump the errors and set exit code 1
    if outputdata:
        with open('/repo/hg/hgweb_bootstrap_out.json', 'w') as f:
            f.write(json.dumps(outputdata))
        return 1
示例#22
0
def getMsgData(topic, group, result, maxsize):
    try:
        saveResult = SaveDataResult()
        saveResult.guid = str(uuid.uuid4())
        saveResult.CreateDate = datetime.datetime.now().strftime(
            "%Y-%m-%d %H:%M:%S")

        msgInfos = []
        result.guid = saveResult.guid
        result.topic_messages = []

        consumer = KafkaConsumer(bootstrap_servers=tmpbootstrap_servers,
                                 enable_auto_commit=False,
                                 group_id=group)

        # Get all partitions by topic
        par = consumer.partitions_for_topic(topic)

        now_count = 0

        for p in par:
            tp = TopicPartition(topic, p)
            consumer.assign([tp])
            print(tp)
            info = MsgPartitionInfo()

            # Get committed offset
            print('start to get committed offset.....')
            try:
                committed = consumer.committed(tp) or 0
            except Exception, e_commit:
                print(str(e_commit))

            # Move consumer to end to get the last position
            consumer.seek_to_end(tp)
            last_offset = consumer.position(tp)

            # Move consumer to beginning to get the first position
            consumer.seek_to_beginning()
            now_offset = consumer.position(tp)
            from_offset = committed

            if from_offset is None:
                from_offset = now_offset

            if from_offset < now_offset:
                from_offset = now_offset

            info.partition_ID = tp.partition
            info.get_last_offset = last_offset
            msgInfos.append(info)

            print("[%s] partition(%s) -> now:%s,  last:%s,  committed:%s" %
                  (tp.topic, tp.partition, now_offset, last_offset, committed))

            # Get msg from position to offset
            while (from_offset < last_offset) and (now_count < maxsize):
                consumer.seek(tp, from_offset)
                polldata = consumer.poll(100)
                from_offset += 1
                now_count += 1
                print('now_count=' + str(now_count))
                result.topic_messages.append(polldata[tp][0].value)

        saveResult.MsgInfo = json.dumps(msgInfos,
                                        default=encode_MsgPartitionInfo,
                                        ensure_ascii=False)
        print(saveResult.MsgInfo)
        consumer.close()
        saveResult.message = "Success"
        saveResult.Code = 200

        producer = KafkaProducer(bootstrap_servers=tmpbootstrap_servers)
        producer.send(topic + "_log",
                      json.dumps(saveResult, default=encode_SaveDataResult))
        producer.flush()
示例#23
0
def main(input, output, consumer, sc, spark, es):
    print(input)
    # supprime dans elasticsearch l'heure précédente
    es.indices.delete(index='speed-layer-twitter', ignore=[400, 404])
    print("suppression speed-layer avant commencement")

    heureMax = datetime.now().hour + 1
    minutesMax = (datetime.now().hour * 60) + datetime.now().minute + 1
    valeur = {}
    list_hastag = []

    #consumer_timeout_ms=1000
    i = 0
    for message in consumer:
        i = i + 1
        print("%s:%d:%d: key=%s value=%s" %
              (message.topic, message.partition, message.offset, message.key,
               message.value))
        print("OFFSET : ", message.offset)
        # consumer.get.offsett
        # offsset précédent
        # offsset = offsset - 1
        valeur = json.loads(message.value.decode())
        print(valeur)
        timestamp = valeur["timestamp"]
        datehashtag = valeur['datehashtag']
        hastags = valeur['hashtags']

        # liste python sc.parrallize
        list_hastag.append(valeur)

        # On détermine l'heure max
        dateHas = datetime.strptime(datehashtag, '%Y/%m/%d %H:%M:%S')
        heureTopic = dateHas.hour
        minuteTopic = dateHas.minute
        totalMinutesTopic = dateHas.hour * 60 + dateHas.minute
        print("heure: %d, minute: %d" % (heureTopic, minuteTopic))
        print("minutetopic: %d, minutemax: %d" %
              (totalMinutesTopic, minutesMax))
        # kapa architecture

        # if hr max > je sorts avec offsset -1 et commit offsset

        print(hastags, "à la date : ", datehashtag)
        if totalMinutesTopic >= minutesMax:
            # Mark this message as fully consumed
            # so it can be included in the next commit
            #consumer.task_done(message)

            # Commit the message that was just consumed
            #consumer.commit()
            #offsets = {message.key:OffsetAndMetadata(message.offset, '')}
            meta = consumer.partitions_for_topic("topic_hashtags")
            tp = TopicPartition(message.topic, message.partition)
            offsets = {tp: OffsetAndMetadata(message.offset, None)}
            consumer.commit(offsets=offsets)
            #consumer.commit(OffsetAndMetadata(message.offset, meta))
            # meta = consumer.partitions_for_topic("topic_hashtags")
            # options = {}
            # options[message.partition] = OffsetAndMetadata(message.offset - 1, meta)
            # consumer.commit(options)
            print("On stoppe la boucle")
            break

    # créer dataframe avro
    # sauvegarder hdfs
    schema = {
        "namespace":
        "ffo.hashtag",
        "type":
        "record",
        "name":
        "Node",
        "fields": [
            {
                "name": "datehashtag",
                "type": "string"
            },
            {
                "name": "timestamp",
                "type": "int"
            },
            {
                "name": "hashtags",
                "type": {
                    "type": "array",
                    "items": "string"
                },
                "default": {}
            },
        ]
    }

    hdfs_client = hdfs.InsecureClient("http://0.0.0:50070")
    with hdfs_client.write(output, overwrite=True) as avro_file:
        fastavro.writer(avro_file, schema, list_hastag)
        print("ok")

    # exploite dataframe opérations explode group by
    # pousse dans elasticsearch
    distData = sc.parallelize(list_hastag)

    # Convert to a Spark dataframe
    df = distData.toDF()

    # Cache data to avoid re-computing everything
    df.persist()
    dt = df.select(explode(df.hashtags), df.datehashtag, df.timestamp)

    for f in dt.collect():
        print("==============enregistrement elasticsearch===================")
        print(f.col)
        print(f.datehashtag)
        print("==============enregistrement elasticsearch===================")
        elastic(f.timestamp, f.datehashtag, f.col, es)
示例#24
0
def run_consumer(consumer_config, kafka_config, neo4j_config):
    neo_driver = Neo4jBoltDriver(neo4j_config).connect()

    c = KafkaConsumer(**kafka_config)

    # First we need to retrieve the list of indexes
    records = neo_driver.session.read_transaction(
        lambda tx: tx.run("CALL db.indexes;"))
    labels = [l[10:-6] for l in records.value()]
    logger.info("[*] Initial indexes : {}".format(labels))

    # Let's start !
    conf = {**kafka_config}
    del conf["sasl_plain_password"]
    topics = consumer_config["kafka"]["topics"]
    logger.info("[*] Starting to consume topic {} : {}".format(topics, conf))

    tps = [TopicPartition(topic, 0) for topic in topics]
    c.assign(tps)
    logger.info("Consumer assigned to: {}".format([tp.topic for tp in tps]))

    last_heartbeat = 0

    while True:

        if int(time.time()
               ) - last_heartbeat >= consumer_config["heartbeat"]["delay"]:
            last_heartbeat = trigger_heartbeat(neo_driver)

        batch_messages = c.poll()

        if batch_messages.values():
            # The consumer currently handle only one partition, so there is no need to iterate
            (topic_partition, records), *_ = batch_messages.items()

            # Keep the first and last offset for debug
            first_offset = records[0].offset
            last_offset = records[-1].offset

            # List of formatted messages used in micro-batches
            messages = validate_and_reformat_messages(
                team=topic_partition.topic.split(".")[1],
                records=[r.value for r in records],
            )

            # Check if indexes are created
            new_labels = set(messages["nodes"].keys()) - set(labels)
            for lab in new_labels:
                cypher = "CREATE CONSTRAINT ON (l:`$LABEL$`) ASSERT l.name IS UNIQUE;".replace(
                    "$LABEL$", lab)
                neo_driver.exec_cypher(cypher, {})
                logger.info("[*] {} index created, indexes are now {}".format(
                    lab, labels))
                labels.append(lab)

            start_batch_time = time.time()
            logger.info(
                "[*] Consuming {} with offsets from #{} to #{}...".format(
                    topic_partition.topic, first_offset, last_offset))

            # Let's start the subsets for the nodes
            for label, nodes in messages["nodes"].items():
                cypher = NODE_TEMPLATE.replace("$LABEL$", label)

                start_query_time = time.time()

                _, summary = neo_driver.exec_cypher(cypher, {"nodes": nodes})

                logger.info(
                    "[nodes] {} done using {} messages in {}s : {}".format(
                        label,
                        len(nodes),
                        round(time.time() - start_query_time, 3),
                        summary.counters,
                    ))

            # Let's start the subsets for the relationships
            for source, targets in messages["rels"].items():
                for target, rels in targets.items():
                    cypher = REL_TEMPLATE.replace("$SOURCE$", source)
                    cypher = cypher.replace("$TARGET$", target)

                    start_query_time = time.time()

                    result, summary = neo_driver.exec_cypher(
                        cypher, {"rels": rels})

                    failures, stats = catch_relationship_validation_errors(
                        result)
                    if len(failures):
                        logger.warning(
                            "[validation] {} relationship(s) failed validation"
                            .format(len(failures)))
                    for f in failures:
                        logger.warning(
                            "[validation] relationship properties failed validation : {}"
                            .format(f))

                    logger.info(
                        "[rels] {} -> {} done using {} message(s) in {}s : {}".
                        format(
                            source,
                            target,
                            len(rels),
                            round(time.time() - start_query_time, 3),
                            stats,
                        ))

            logger.info("[*] Batch done in {} seconds".format(
                round(time.time() - start_batch_time, 3)))

            c.commit()
示例#25
0
 def on_partitions_assigned(self, assigned):
     print(assigned)
     consumer.seek(TopicPartition("Panda_Media", 0), 0)
     return
示例#26
0
logger = get_logger('notzam')

KAFKA_BROKER_URL = os.environ.get('KAFKA_BROKER_URL')


def home(request):
    return render(request, 'ml_home.html')


def model_summary(request: HttpRequest):
    return render(request, 'ml_model_summary.html',
                  {'model_summary': get_model_summary()})


trained = consumer(KAFKA_BROKER_URL)
trained_partition = TopicPartition('trained', 0)
trained.assign([trained_partition])
trained.poll(1)

detected = consumer(KAFKA_BROKER_URL)
detected_partition = TopicPartition('detected', 0)
detected.assign([detected_partition])
detected.poll(1)

logger.info(KAFKA_BROKER_URL)


def training(request):
    if request.is_ajax():
        msg = trained.poll(50)
        msg = _ext_record_value(msg, trained_partition)
示例#27
0
from kafka import KafkaConsumer
from kafka import TopicPartition
import tornado
import json
import tornado.ioloop
import tornado.web
import tornado.websocket
import tornado.template
from config import KAFKA_BOOTSTRAP_SERVERS
from config import MESSAGE_BURST
import time
from random import randint

consumer = KafkaConsumer(bootstrap_servers=KAFKA_BOOTSTRAP_SERVERS)
consumer.assign([TopicPartition('hello-csv', 0)])


class MainHandler(tornado.web.RequestHandler):
    def get(self):
        loader = tornado.template.Loader(".")
        self.write(loader.load("./resources/templates/graph.html").generate())


class WSHandler(tornado.websocket.WebSocketHandler):
    def check_origin(self, origin):
        return True

    def open(self):
        self.write_message("{}")
        # for message in consumer:
        #   messageJson = json.loads(message.value)
示例#28
0
def receiveFromKafka(mode, topic_override=None):

    TOPIC = KAFKA_TOPIC_READ
    if (topic_override != None):
        TOPIC = topic_override

    logger.info("Will use topic = {}".format(TOPIC))
    consumer = HerokuKafkaConsumer(
        #KAKFA_TOPIC, # Optional: You don't need to pass any topic at all
        url= KAFKA_URL, # Url string provided by heroku
        ssl_cert= KAFKA_CLIENT_CERT, # Client cert string
        ssl_key= KAFKA_CLIENT_CERT_KEY, # Client cert key string
        ssl_ca= KAFKA_TRUSTED_CERT, # Client trusted cert string
        prefix= KAFKA_PREFIX, # Prefix provided by heroku,
        auto_offset_reset="smallest",
        max_poll_records=100,
        enable_auto_commit=True,
        auto_commit_interval_ms=100,
        group_id=KAFKA_GROUP_ID,
        api_version = (0,9)
    )

    """
    To subscribe to topic(s) after creating a consumer pass in a list of topics without the
    KAFKA_PREFIX.
    """
    partition=1
    
    tp = TopicPartition(KAFKA_PREFIX + TOPIC, partition)
    if (mode == "subscribe"):
        consumer.subscribe(topics=(TOPIC))
    elif (mode == "assign"):
        consumer.assign([tp])

    # display list of partition assignerd
    assignments = consumer.assignment()
    for assignment in assignments:
        logger.debug(assignment)
    
    partitions=consumer.partitions_for_topic(KAFKA_PREFIX + TOPIC)
    if (partitions):
        for partition in partitions:
            logger.debug("Partition="+str(partition))
    
    
    topics=consumer.topics()
    if (topics):
        for topic in topics:
            logger.debug("Topic:"+topic)
    #exit(1)
    logger.debug('waiting ..')
    """
    .assign requires a full topic name with prefix
    """
    

    """
    Listening to events it is exactly the same as in kafka_python.
    Read the documention linked below for more info!
    """
    i=0
    for message in consumer:
        try:
            logger.debug ("%i %s:%d:%d: key=%s value=%s" % (i, message.topic, message.partition,
                                              message.offset, message.key,
                                              message.value))

            dictValue = ujson.loads(message.value)
            logger.debug(dictValue)
            
            # check value in the field Action Type
            if ("payload" in dictValue):
                if (dictValue['payload']['Action_Type__c'] == 'PushNotification'):
                    logger.info("about to send a BROWSER NOTIFICATION using PUSHER")
                    message = dictValue['payload']['message__c']
                    userid = dictValue['payload']['userid__c'],
                    notification.sendNotification(userid, message)  
            """
            if ('channel' in  dictValue): # means it's coming from a Platform EVENT via kafka
            if ('host_accept_guest__e'  in dictValue['channel'].lower()): 
                logger.info("about to send a SMS using BLOWER")
                message = "Dear {} {} , {} {} is aware of your arrival and will be here shortly".format(
                    dictValue['data']['payload']['Guest_Firstname__c'],
                    dictValue['data']['payload']['Guest_Lastname__c'],
                    dictValue['data']['payload']['Host_Firstname__c'],
                    dictValue['data']['payload']['Host_Lastname__c'],
                )
                blower.sendMessage(message, dictValue['data']['payload']['Guest_Phone_Number__c'])
            elif ('send_smss__e' in dictValue['channel'].lower()):
                logger.info("about to send a SMS using BLOWER")
                message = dictValue['data']['payload']['message__c']
                phone_Number = dictValue['data']['payload']['phone_Number__c'],
                blower.sendMessage(message, phone_Number)   
            #{'schema': 'h7kPS4B7NEsigjlW7748lg', 
            #   'payload': {
            #           'CreatedById': '0051t000002FB13AAG', 
            #            'message__c': 'Hello ! ', 
            #            'Action_Type__c': 'PushNotification', 
            #            'CreatedDate': '2020-06-16T15:52:45.535Z', 
            #            'userid__c': 'dac11bb3-148e-4b27-a6f2-caf0af09fb0a'}, 'event': {'replayId': 14570697}}}    
            elif ('push_notification__e' in dictValue['channel'].lower()):
                logger.info("about to send a BROWSER NOTIFICATION using PUSHER")
                message = dictValue['data']['payload']['message__c']
                userid = dictValue['data']['payload']['userid__c'],
                notification.sendNotification(userid, message)  
            """
            consumer.commit()
        except Exception as e :
            import traceback
            traceback.print_exc()
            consumer.commit()

        i += 1
import json
import logging

logger = logging.getLogger(__name__)
# enable the debug logger if you want to see ALL of the lines
logging.basicConfig(level=logging.INFO)

# Creating Kafka Consumer
# Consumes Kafka messages

listBootstrapServer = ['127.0.0.1:9092']

consumer = KafkaConsumer(
    bootstrap_servers=listBootstrapServer,
    auto_offset_reset='earliest',
    value_deserializer=lambda m: json.loads(m.decode('ascii')),
    consumer_timeout_ms=1000000,
    group_id='My-first-app')

# Assign
topicPartition = TopicPartition('Learning_Kafka_1', 0)
consumer.assign([topicPartition])

# Seek
consumer.seek(topicPartition, 40)

for message in consumer:
    print("topic=%s partition=%d offset=%d key=%s value=%s" %
          (message.topic, message.partition, message.offset, str(
              message.key), message.value))
示例#30
0
        info['platform-release']=platform.release()
        info['platform-version']=platform.version()
        info['architecture']=platform.machine()
        info['hostname']=socket.gethostname()
        info['ip-address']=socket.gethostbyname(socket.gethostname())
        info['mac-address']=':'.join(re.findall('..', '%012x' % uuid.getnode()))
        info['processor']=platform.processor()
        info['ram']=str(round(psutil.virtual_memory().total / (1024.0 ** 3)))+" GB"
        return json.dumps(info)
    except Exception as e:
        logging.exception(e)


print("Starting Consumer 2;")
print(json.loads(getSystemInfo()))
consumer = KafkaConsumer(bootstrap_servers="localhost:9093",
                         client_id="number_consumer2",
                         auto_offset_reset='earliest',
                         enable_auto_commit=False,
                         consumer_timeout_ms=1000)

partition = TopicPartition('number', 0)
consumer.assign([partition])
consumer.seek_to_beginning(partition)

sum_numbers = 0
for msg in consumer:
    sum_numbers += int.from_bytes(msg.value, 'big')
    
print("The sum of all numbers recorded is: {:i}", sum_numbers)