Exemplo n.º 1
0
def main():
    # 1.获取Kafka对象
    kafka = Kafka()
    # 2.读取Kafka数据
    consumer = kafka.read_kafka(CONSUMER_TOPIC)
    # 3.获取HBase对象
    hbase = HBase()
    # 4.创建HBase表
    hbase.get_create_table(TARGET_TABLE_NAME, COLUMN_FAMILY)

    for message in consumer:
        try:
            data = json.loads(message.value)
            print(data)
        except Exception as e:
            print(e)
            continue
        # obj = data[TARGET_TABLE_NAME][0]
        # row_key = obj[ROW_KEY]
        # 5.写入HBase
        # hbase.put_row(TARGET_TABLE_NAME, COLUMN_FAMILY, row_key, dict(obj))
        # 6.写入Kafka
        # message = json.dumps({TARGET_TABLE_NAME: obj})
        # kafka.write_kafka(PRODUCER_TOPIC, message)
        kafka.update_offset(message)

    hbase.close()
Exemplo n.º 2
0
def main():
    # 1.获取Kafka对象
    kafka = Kafka()
    # 2.读取Kafka数据
    consumer = kafka.read_kafka(CONSUMER_TOPIC)
    # 3.获取HBase对象
    hbase = HBase()
    # 4.创建HBase表
    hbase.get_create_table(TARGET_TABLE_NAME, COLUMN_FAMILY)
    for message in consumer:
        data = json.loads(message.value)
        if data['table'] == SOURCE_TABLE_NAME:
            obj = data['data'][0]
            row_key = obj[ROW_KEY]
            # 5.写入HBase
            hbase.put_row(TARGET_TABLE_NAME, COLUMN_FAMILY, row_key, dict(obj))
            # 6.写入Kafka
            message = json.dumps({TARGET_TABLE_NAME: obj})
            kafka.write_kafka(PRODUCER_TOPIC, message)
def main():
    # 1.获取Kafka对象
    kafka = Kafka(group_id='DP3')
    print('1')
    # 2.读取Kafka数据
    consumer = kafka.read_kafka(CONSUMER_TOPIC)

    # 3.获取HBase对象
    hbase = HBase()
    # 4.创建HBase表
    hbase.get_create_table(SINGLE_TARGET_TABLE_NAME, COLUMN_FAMILY)
    hbase.get_create_table(STATS_TARGET_TABLE_NAME, COLUMN_FAMILY)

    # modelSuccess 改成 isSuccess。内容要转义  True->1 ,False->0
    for msg in consumer:
        try:
            data = json.loads(msg.value)
            # modelSuccess 改成 isSuccess
            data["isSuccess"] = '1' if data["modelSuccess"] else '0'
            del data["modelSuccess"]

            print('2')

            row_key_model_code = data[STATS_ROW_KEY_MODEL_CODE]
            row_key_timestamp = data[STATS_ROW_KEY_TIMESTAMP]
            row_key_subject_code = data[STATS_ROW_KEY_SUBJECT_CODE]
            log_time_cost = data["timeCost"]  # 响应时长

            row_key_model_code = row_key_model_code[::-1]

            row_key_single = data[SINGLE_ROW_KEY]
            # 模型编码5+用户标识符18+时间戳13=36位
            row_kwy_stats = row_key_model_code + row_key_subject_code + row_key_timestamp

            result = hbase.get_rows(STATS_TARGET_TABLE_NAME,
                                    row_prefix=row_key_model_code +
                                    row_key_subject_code)
            print('3')
            if result:
                # 表中已有记录
                print('4')
                tmp_data = result[len(result) - 1]
                # 更新callDate、callTimes、successTimes、sumTime
                tmp_data["callDate"], tmp_data["callTimes"], tmp_data["successTimes"], tmp_data["sumTime"] = \
                    row_key_timestamp, \
                    str(int(tmp_data["callTimes"]) + 1), \
                    str(int(tmp_data["successTimes"]) + 1) if int(data["isSuccess"]) else tmp_data["successTimes"], \
                    str(int(tmp_data["sumTime"]) + log_time_cost)
                print("tmp_data: ", tmp_data)
                hbase.put_row(STATS_TARGET_TABLE_NAME, COLUMN_FAMILY,
                              row_kwy_stats, tmp_data)
                print('5')
            else:
                # 表中没有记录,日志统计表数据初始化
                print('6')
                tmp_data = {
                    "callDate": row_key_timestamp,
                    "subjectCode": row_key_subject_code,
                    "subjectName": data["subjectName"],
                    "subjectType": data["subjectType"],
                    "modelCode": row_key_model_code,
                    "modelName": data["modelName"],
                    "callTimes": '0',
                    "successTimes": '0',
                    "sumTime": '0'
                }
                hbase.put_row(STATS_TARGET_TABLE_NAME, COLUMN_FAMILY,
                              row_kwy_stats, tmp_data)
                print('7')

        except Exception as e:
            print(e)
            continue
        # 5.写入HBase
        # STEP1: 全部数据写进hbase al:s003_evt_model_log表中
        hbase.put_row(SINGLE_TARGET_TABLE_NAME, COLUMN_FAMILY, row_key_single,
                      data)
        kafka.update_offset(msg)
        print('8')