Пример #1
0
def comsumer():
    # consumer = SimpleConsumer(client, group=None,
    # topic=topic, partitions=[0, ],
    #                                   auto_commit=False)
    # node_id = list(consumer.client.conns.keys())[0]
    # print dir(consumer.client.conns[node_id])
    # for i in consumer.get_messages(100):
    # 	print i.offset
    # consumer.commit()
    # from pykafka import KafkaClient
    #
    # client = KafkaClient(hosts="127.0.0.1:9092")
    # print client.topics
    # topic1 = client.topics[topic]
    # consumer = topic1.get_simple_consumer(auto_commit_enable=True, )
    # for message in consumer:
    #     if message is not None:
    #         print message.offset, message.value

    connect_str = '127.0.0.1:9092'
    consumer = KafkaConsumer(topic, group_id='my-group', bootstrap_servers=[connect_str],
                             auto_offset_reset='largest',auto_commit_enable=True,
                             auto_commit_interval_messages=1000)# largest,smallest
    consumer.set_topic_partitions((topic, 2, 50032),)  # Optionally specify offsets to start from
    # kafka.set_topic_partitions("topic1", ("topic2", 2), {"topic3": 0}) #partition只能被一个消费者消费,所以最好指定消费哪个partitions
    # kafka.set_topic_partitions({ ("topic1", 0): 12, ("topic2", 1): 45 })
    # print consumer.topics
    for message in consumer:
        print ("%s:%d:%d: key=%s value=%s" % (message.topic, message.partition,
                                             message.offset, message.key,
                                             message.value))
    consumer.commit()
Пример #2
0
def consume(args):
    schema = args.schema
    table = args.table
    assert schema in settings.SCHEMAS, 'schema must in settings.SCHEMAS'
    assert table in settings.TABLES, 'table must in settings.TABLES'
    group_id = f'{schema}.{table}'
    consumer = KafkaConsumer(
        bootstrap_servers=settings.KAFKA_SERVER,
        value_deserializer=lambda x: json.loads(x, object_hook=object_hook),
        key_deserializer=lambda x: x.decode() if x else None,
        enable_auto_commit=False,
        group_id=group_id,
        auto_offset_reset='earliest',
    )
    topic = settings.KAFKA_TOPIC
    partition = settings.PARTITIONS.get(group_id)
    consumer.assign([TopicPartition(topic, partition)])
    event_list = []
    logger.info(
        f'success consume topic:{topic},partition:{partition},schema:{schema},table:{table}'
    )
    pk = reader.get_primary_key(schema, table)
    for msg in consumer:  # type:ConsumerRecord
        logger.debug(f'kafka msg:{msg}')
        event = msg.value
        event_list.append(event)
        len_event = len(event_list)
        if len_event == settings.INSERT_NUMS or (
            (int(time.time() * 10**6) - event_list[0]['event_unixtime']) /
                10**6 >= settings.INSERT_INTERVAL > 0):
            data_dict = {}
            tmp_data = []
            for items in event_list:
                action = items['action']
                action_core = items['action_core']
                data_dict.setdefault(table + schema + action + action_core,
                                     []).append(items)
            for k, v in data_dict.items():
                tmp_data.append(v)
            result = writer.insert_event(tmp_data, settings.SKIP_TYPE,
                                         settings.SKIP_DELETE_TB_NAME, schema,
                                         table, pk)
            if result:
                event_list = []
                consumer.commit()
                logger.info(f'commit success {len_event} events!')
            else:
                logger.error('insert event error!')
                exit()
Пример #3
0
def commitTopic(topic, group, partition, commit_offset):
    try:
        print(
            '===================================================================================='
        )
        print('[commitTopic] : topic=' + topic + ', group=' + group +
              ', partition=' + str(partition) + ', commit_offset=' +
              str(commit_offset))
        consumer2 = KafkaConsumer(bootstrap_servers=tmpbootstrap_servers,
                                  enable_auto_commit=False,
                                  group_id=group)
        tp = TopicPartition(topic, partition)

        if int(commit_offset) > 0:

            consumer2.commit({tp: OffsetAndMetadata(commit_offset, None)})

    except Exception as ee:
        print('error when commit Topic')
        print(str(ee))
    finally:
        print('commitTopic end')
Пример #4
0
def consume(args):
    schema = args.schema
    tables = args.tables
    skip_error = args.skip_error
    assert schema in settings.SCHEMAS, f'schema {schema} must in settings.SCHEMAS'
    topic = settings.KAFKA_TOPIC
    tables_pk = {}
    partitions = []
    for table in tables.split(','):
        assert table in settings.TABLES, f'table {table} must in settings.TABLES'

        partition = settings.PARTITIONS.get(f'{schema}.{table}')
        tp = TopicPartition(topic, partition)
        partitions.append(tp)
        tables_pk[table] = reader.get_primary_key(schema, table)

    group_id = f'{schema}.{tables}'
    consumer = KafkaConsumer(
        bootstrap_servers=settings.KAFKA_SERVER,
        value_deserializer=lambda x: json.loads(x, object_hook=object_hook),
        key_deserializer=lambda x: x.decode() if x else None,
        enable_auto_commit=False,
        group_id=group_id,
        auto_offset_reset='earliest',
    )
    consumer.assign(partitions)

    event_list = {}
    is_insert = False
    last_time = 0
    len_event = 0
    logger.info(f'success consume topic:{topic},partitions:{partitions},schema:{schema},tables:{tables}')

    for msg in consumer:  # type:ConsumerRecord
        logger.debug(f'kafka msg:{msg}')
        event = msg.value
        event_unixtime = event['event_unixtime'] / 10 ** 6
        table = event['table']
        schema = event['schema']
        event_list.setdefault(table, []).append(event)
        len_event += 1

        if last_time == 0:
            last_time = event_unixtime

        if len_event == settings.INSERT_NUMS:
            is_insert = True
        else:
            if event_unixtime - last_time >= settings.INSERT_INTERVAL > 0:
                is_insert = True
        if is_insert:
            data_dict = {}
            events_num = 0
            for table, items in event_list.items():
                for item in items:
                    action = item['action']
                    action_core = item['action_core']
                    data_dict.setdefault(table, {}).setdefault(table + schema + action + action_core, []).append(item)
            for table, v in data_dict.items():
                tmp_data = []
                for k1, v1 in v.items():
                    events_num += len(v1)
                    tmp_data.append(v1)
                try:
                    result = writer.insert_event(tmp_data, schema, table, tables_pk.get(table))
                    if not result:
                        logger.error('insert event error!')
                        if not skip_error:
                            exit()
                except Exception as e:
                    logger.error(f'insert event error!,error:{e}')
                    if not skip_error:
                        exit()
            consumer.commit()
            logger.info(f'commit success {events_num} events!')
            event_list = {}
            is_insert = False
            len_event = last_time = 0
Пример #5
0
def consume(args):
    schema = args.schema
    skip_error = args.skip_error
    auto_offset_reset = args.auto_offset_reset
    topic = settings.KAFKA_TOPIC
    tables_pk = {}
    tables = settings.SCHEMAS.get(schema)
    partitions = []
    for table in tables:
        partition = settings.PARTITIONS.get(schema)
        tp = TopicPartition(topic, partition)
        partitions.append(tp)
        tables_pk[table] = reader.get_primary_key(schema, table)

    consumer = KafkaConsumer(
        bootstrap_servers=settings.KAFKA_SERVER,
        value_deserializer=lambda x: json.loads(x, object_hook=object_hook),
        key_deserializer=lambda x: x.decode() if x else None,
        enable_auto_commit=False,
        group_id=schema,
        auto_offset_reset=auto_offset_reset,
    )
    consumer.assign(partitions)

    event_list = {}
    is_insert = False
    last_time = 0
    len_event = 0
    logger.info(
        f'success consume topic:{topic},partitions:{partitions},schema:{schema},tables:{tables}'
    )

    for msg in consumer:  # type:ConsumerRecord
        logger.debug(f'kafka msg:{msg}')
        event = msg.value
        event_unixtime = event['event_unixtime'] / 10**6
        table = event['table']
        schema = event['schema']
        action = event['action']

        if action == 'query':
            do_query = True
            query = event['values']['query']
        else:
            do_query = False
            query = None
            event_list.setdefault(table, []).append(event)
            len_event += 1

        if last_time == 0:
            last_time = event_unixtime

        if len_event == settings.INSERT_NUMS:
            is_insert = True
        else:
            if event_unixtime - last_time >= settings.INSERT_INTERVAL > 0:
                is_insert = True
        if is_insert or do_query:
            data_dict = {}
            events_num = 0
            for table, items in event_list.items():
                for item in items:
                    action = item['action']
                    action_core = item['action_core']
                    data_dict.setdefault(table, {}).setdefault(
                        table + schema + action + action_core, []).append(item)
            for table, v in data_dict.items():
                tmp_data = []
                for k1, v1 in v.items():
                    events_num += len(v1)
                    tmp_data.append(v1)
                try:
                    result = writer.insert_event(tmp_data, schema, table,
                                                 tables_pk.get(table))
                    if not result:
                        logger.error('insert event error!')
                        if not skip_error:
                            exit()

                    if settings.UI_ENABLE:
                        insert_into_redis('consumer', schema, table, len(v1))

                except Exception as e:
                    logger.error(f'insert event error!,error:{e}')
                    if not skip_error:
                        exit()
            if do_query:
                try:
                    logger.info(f'execute query:{query}')
                    writer.execute(query)
                except Exception as e:
                    logger.error(f'execute query error!,error:{e}')
                    if not skip_error:
                        exit()
            consumer.commit()
            logger.info(f'commit success {events_num} events!')
            event_list = {}
            is_insert = False
            len_event = last_time = 0
Пример #6
0
class IBUSStreamingDownsamplingConsumer:
    LOG_FORMAT ="{} UTC_TS\t"\
                "{}"

    def __init__(self, kafkaHost, kafkaPort, tcpHost, tcpPort, group_id, topic,
                 logTopic, interval):
        self.kafkaHost = kafkaHost
        self.kafkaPort = kafkaPort
        self.tcpHost = tcpHost
        self.tcpPort = tcpPort
        self.group_id = group_id
        self.topic = topic
        self.logTopic = logTopic
        self.interval = int(interval)
        self.consumer = KafkaConsumer(
            topic,
            bootstrap_servers=["{}:{}".format(kafkaHost, kafkaPort)],
            group_id=group_id,
            enable_auto_commit=False)
        self.producer = KafkaProducer(
            bootstrap_servers=["{}:{}".format(kafkaHost, kafkaPort)])
        self.tcpWriter = None

    def getTopicPartitions(self):
        self.consumer.topics()  #This ensures local cache is updated with
        # information about partitions, offsets etc.
        pids = self.consumer.partitions_for_topic(self.topic)
        tps = [TopicPartition(self.topic, pid) for pid in pids]
        return tps

    def getTopicPartitionsCommittedPositions(self):
        tps = self.getTopicPartitions()
        ret = [(tp, self.consumer.committed(tp)) for tp in tps]
        return ret

    async def tcp_server_handler(self, reader, writer):
        addr = str(writer.get_extra_info("socket").getpeername())
        if self.tcpWriter is not None:
            self.log("refused " + addr)
            writer.write(b"Connection limit reached; connection refused.")
            writer.close()
            return
        self.log("accepted " + addr)
        self.tcpWriter = writer
        t1 = asyncio.create_task(self.poll_from_Kafka(writer))
        try:
            while True:
                data = await reader.read(1)  # 1024*16 bytes
                if not data:
                    break
        except BrokenPipeError:
            """
      Catches connecton reset by peer when we are sending the batched data,
       which is also when we cannot check for reader. The broken connection
       on the writer side will ultimately lead to  BrokenPipeError on the
       reader side. Hence
      """
            pass
        finally:
            t1.cancel()
            self.log("closed " + addr)
            writer.close()
            self.tcpWriter = None

    async def poll_from_Kafka(self, writer):
        while True:
            prevPos = self.getTopicPartitionsCommittedPositions()
            polled = self.consumer.poll(timeout_ms=1000)
            records = [
                record.value for recordList in polled.values()
                for record in recordList
            ]
            try:
                for record in records:
                    writer.write(record)
                    await writer.drain()
            except ConnectionResetError:
                """
        The error is not thrown reliably. If a connection is broken, and
         one try to
            writer.write(record)
            await writer.drain()
         This error may not manifest. It is thrown more often when one try
         to repeatedly write to and drain a broken connection.
        """
                print("Last batch not fully sent, not commited.")
                for tp, pos in prevPos:
                    self.consumer.seek(tp, pos)
                break
            else:
                self.consumer.commit()
            await asyncio.sleep(self.interval)

    def log(self, msg):
        self.producer.send( self.logTopic,
                            self.LOG_FORMAT.format( datetime.now().timestamp(),
                                                    msg
                                                    ) \
                                .encode()
                            )

    def cleanup(self):
        self.log("shutdown")
        self.consumer.close()
        self.producer.flush()
        self.producer.close()

    def run(self):
        self.log("running")
        asyncio.run(self._async_run())

    async def _async_run(self):
        tcpServer = await asyncio.start_server(self.tcp_server_handler,
                                               self.tcpHost, self.tcpPort)
        await tcpServer.serve_forever()
Пример #7
0
def consume(args):
    settings = Global.settings
    writer = Global.writer
    reader = Global.reader

    schema = args.schema
    skip_error = args.skip_error
    auto_offset_reset = args.auto_offset_reset
    offset = args.offset

    topic = settings.kafka_topic
    tables_pk = {}
    schema_table = settings.schema_table.get(schema)
    tables = schema_table.get("tables")

    for table in tables:
        tables_pk[table] = reader.get_primary_key(schema, table)

    consumer = KafkaConsumer(
        bootstrap_servers=settings.kafka_server,
        value_deserializer=lambda x: json.loads(x, object_hook=object_hook),
        key_deserializer=lambda x: x.decode() if x else None,
        enable_auto_commit=False,
        group_id=schema,
        auto_offset_reset=auto_offset_reset,
    )
    partition = schema_table.get("kafka_partition")
    topic_partition = TopicPartition(topic, partition)
    consumer.assign([topic_partition])
    if offset:
        consumer.seek(topic_partition, offset)
    event_list = {}
    is_insert = False
    last_time = 0
    len_event = 0
    logger.info(
        f"success consume topic:{topic},partitions:{partition},schema:{schema},tables:{tables}"
    )

    for msg in consumer:  # type:ConsumerRecord
        logger.debug(f"kafka msg:{msg}")
        event = msg.value
        event_unixtime = event["event_unixtime"] / 10**6
        table = event["table"]
        schema = event["schema"]
        action = event["action"]

        if action == "query":
            alter_table = True
            query = event["values"]["query"]
        else:
            alter_table = False
            query = None
            event_list.setdefault(table, []).append(event)
            len_event += 1

        if last_time == 0:
            last_time = event_unixtime

        if len_event == settings.insert_num:
            is_insert = True
        else:
            if event_unixtime - last_time >= settings.insert_interval > 0:
                is_insert = True
        if is_insert or alter_table:
            data_dict = {}
            events_num = 0
            for table, items in event_list.items():
                for item in items:
                    action = item["action"]
                    action_core = item["action_core"]
                    data_dict.setdefault(table, {}).setdefault(
                        table + schema + action + action_core, []).append(item)
            for table, v in data_dict.items():
                tmp_data = []
                for k1, v1 in v.items():
                    events_num += len(v1)
                    tmp_data.append(v1)
                try:
                    result = writer.insert_event(tmp_data, schema, table,
                                                 tables_pk.get(table))
                    if not result:
                        logger.error("insert event error!")
                        if not skip_error:
                            exit()
                except Exception as e:
                    logger.error(f"insert event error!,error:{e}")
                    if not skip_error:
                        exit()
            if alter_table:
                try:
                    logger.info(f"execute query:{query}")
                    writer.execute(query)
                except Exception as e:
                    logger.error(f"execute query error!,error:{e}")
                    if not skip_error:
                        exit()
            consumer.commit()
            logger.info(f"commit success {events_num} events!")
            event_list = {}
            is_insert = False
            len_event = last_time = 0