def merge_pre_hi_data_task(hive_db, hive_all_hi_table_name, hive_hi_table_name,
                           is_must_have_data, pt, now_hour, pre_hour_day,
                           pre_hour, **kwargs):
    sqoopSchema = SqoopSchemaUpdate()
    hive_columns = sqoopSchema.get_hive_column_name(hive_db,
                                                    hive_all_hi_table_name)

    hql = ADD_HI_SQL.format(db_name=hive_db,
                            hive_all_hi_table_name=hive_all_hi_table_name,
                            hive_hi_table_name=hive_hi_table_name,
                            pt=pt,
                            now_hour=now_hour,
                            pre_hour_day=pre_hour_day,
                            pre_hour=pre_hour,
                            columns=',\n'.join(hive_columns))

    hive_hook = HiveCliHook()

    # 读取sql

    logging.info('Executing: %s', hql)

    # 执行Hive
    hive_hook.run_cli(hql)

    # 生成_SUCCESS
    """
    第一个参数true: 数据目录是有country_code分区。false 没有
    第二个参数true: 数据有才生成_SUCCESS false 数据没有也生成_SUCCESS 

    """
    TaskTouchzSuccess().countries_touchz_success(
        pt, hive_db, hive_all_hi_table_name,
        ALL_HI_OSS_PATH % hive_all_hi_table_name, "false", is_must_have_data,
        now_hour)
Exemplo n.º 2
0
def merge_pre_hi_with_full_data_task(hive_db, hive_h_his_table_name,
                                     hive_hi_table_name, mysql_db_name,
                                     mysql_table_name, mysql_conn,
                                     sqoop_temp_db_name, sqoop_table_name, pt,
                                     now_hour, pre_day, pre_hour_day, pre_hour,
                                     is_must_have_data, **kwargs):
    sqoopSchema = SqoopSchemaUpdate()

    hive_columns = sqoopSchema.get_hive_column_name(hive_db,
                                                    hive_h_his_table_name)
    mysql_columns = sqoopSchema.get_mysql_column_name(mysql_db_name,
                                                      mysql_table_name,
                                                      mysql_conn)
    pre_day_ms = int(time.mktime(time.strptime(pre_day, "%Y-%m-%d"))) * 1000

    hql = MERGE_HI_WITH_FULL_SQL.format(
        columns=',\n'.join(hive_columns),
        pt=pt,
        now_hour=now_hour,
        db_name=hive_db,
        mysql_db_name=mysql_db_name,
        hive_h_his_table_name=hive_h_his_table_name,
        hive_hi_table_name=hive_hi_table_name,
        mysql_table_name=mysql_table_name,
        pre_day_ms=pre_day_ms,
        mysql_columns=',\n'.join(mysql_columns),
        sqoop_temp_db_name=sqoop_temp_db_name,
        sqoop_table_name=sqoop_table_name)

    hive_hook = HiveCliHook()

    # 读取sql
    logging.info('Executing: %s', hql)

    # 执行Hive
    hive_hook.run_cli(hql)

    # 生成_SUCCESS
    """
    第一个参数true: 数据目录是有country_code分区。false 没有
    第二个参数true: 数据有才生成_SUCCESS false 数据没有也生成_SUCCESS 

    """
    TaskTouchzSuccess().countries_touchz_success(
        pt, hive_db, hive_h_his_table_name,
        H_HIS_OSS_PATH % hive_h_his_table_name, "false", is_must_have_data,
        now_hour)
Exemplo n.º 3
0
def create_hive_external_table(db, table, conn, **op_kwargs):
    sqoopSchema = SqoopSchemaUpdate()
    response = sqoopSchema.update_hive_schema(
        hive_db=hive_db,
        hive_table=hive_table.format(bs=table),
        mysql_db=db,
        mysql_table=table,
        mysql_conn=conn
    )
    #if response:
    #    return True

    mysql_conn = get_db_conn(conn)
    mcursor = mysql_conn.cursor()
    sql = '''
        select 
            COLUMN_NAME, 
            DATA_TYPE, 
            COLUMN_COMMENT,
            COLUMN_TYPE 
        from information_schema.COLUMNS 
        where TABLE_SCHEMA='{db}' and 
            TABLE_NAME='{table}' 
        order by ORDINAL_POSITION
    '''.format(db=db, table=table)
    # logging.info(sql)
    mcursor.execute(sql)
    res = mcursor.fetchall()
    # logging.info(res)
    columns = []
    for (name, type, comment, co_type) in res:
        if type.upper() == 'DECIMAL':
            columns.append("`%s` %s comment '%s'" % (name, co_type.replace('unsigned', '').replace('signed', ''), comment))
        else:
            columns.append("`%s` %s comment '%s'" % (name, mysql_type_to_hive.get(type.upper(), 'string'), comment))
    mysql_conn.close()
    # 创建hive数据表的sql
    hql = ods_create_table_hql.format(
        db_name=hive_db,
        table_name=hive_table.format(bs=table),
        columns=",\n".join(columns),
        hdfs_path=hdfs_path.format(bs=table)
    )
    logging.info(hql)
    hive_hook = HiveCliHook()
    logging.info('Executing: %s', hql)
    hive_hook.run_cli(hql)
Exemplo n.º 4
0
def run_check_table(schema_table_db_name, schema_table_name,
                    target_table_db_name, target_table_name, conn_id,
                    hive_table_name, server_name, **kwargs):
    # SHOW TABLES in oride_db LIKE 'data_aa'
    check_sql = 'SHOW TABLES in %s LIKE \'%s\'' % (HIVE_DB, hive_table_name)
    hive2_conn = HiveServer2Hook().get_conn()
    cursor = hive2_conn.cursor()
    cursor.execute(check_sql)
    if len(cursor.fetchall()) == 0:
        logging.info('Create Hive Table: %s.%s', HIVE_DB, hive_table_name)
        # get table column
        column_sql = '''
                SELECT
                    COLUMN_NAME,
                    DATA_TYPE,
                    NUMERIC_PRECISION,
                    NUMERIC_SCALE,
                    COLUMN_COMMENT
                FROM
                    information_schema.columns
                WHERE
                    table_schema='{db_name}' and table_name='{table_name}'
            '''.format(db_name=schema_table_db_name,
                       table_name=schema_table_name)
        mysql_hook = MySqlHook(conn_id)
        mysql_conn = mysql_hook.get_conn()
        mysql_cursor = mysql_conn.cursor()
        mysql_cursor.execute(column_sql)
        results = mysql_cursor.fetchall()
        rows = []
        for result in results:
            if result[0] == 'dt':
                col_name = '_dt'
            else:
                col_name = result[0]
            if result[1] == 'timestamp' or result[1] == 'varchar' or result[1] == 'char' or result[1] == 'text' or \
                    result[1] == 'longtext' or \
                    result[1] == 'mediumtext' or \
                    result[1] == 'json' or \
                    result[1] == 'datetime':
                data_type = 'string'
            elif result[1] == 'decimal':
                data_type = result[1] + "(" + str(result[2]) + "," + str(
                    result[3]) + ")"
            else:
                data_type = result[1]
            rows.append("`%s` %s comment '%s'" %
                        (col_name, data_type, str(result[4]).replace(
                            '\n', '').replace('\r', '')))
        mysql_conn.close()

        # hive create table
        hive_hook = HiveCliHook()
        sql = ODS_CREATE_TABLE_SQL.format(
            db_name=HIVE_DB,
            table_name=hive_table_name,
            columns=",\n".join(rows),
            oss_path=OSS_PATH % ("{server_name}.{db_name}.{table_name}".format(
                server_name=server_name,
                db_name=target_table_db_name,
                table_name=target_table_name)))
        logging.info('Executing: %s', sql)
        hive_hook.run_cli(sql)

    else:
        sqoopSchema = SqoopSchemaUpdate()
        response = sqoopSchema.append_hive_schema(
            hive_db=HIVE_DB,
            hive_table=hive_table_name,
            mysql_db=schema_table_db_name,
            mysql_table=schema_table_name,
            mysql_conn=conn_id,
            oss_path=OSS_PATH % ("{server_name}.{db_name}.{table_name}".format(
                server_name=server_name,
                db_name=target_table_db_name,
                table_name=target_table_name)))
        if response:
            return True
    return