def run_hive(configData: ConfigData):
    a_client = InsecureClient(url=configData.hdfs_ip(), user="******")  # "http://10.2.201.197:50070"
    conn = connect(host=configData.hive_ip(), port=configData.hive_port(), auth_mechanism=configData.hive_auth(), user=configData.hive_user())
    cur = conn.cursor()

    f_date_str = configData.get_f_date()  # "20181101"
    p_date_str = configData.get_p_date()  # "2018-11-01"

    # hdfs_dir_bl
    root_path = str(pathlib.PurePosixPath(configData.get_hdfs_path()).joinpath(f_date_str))
    file_name = str(pathlib.PurePosixPath(root_path).joinpath(configData.get_file_name(f_date_str)))
    # "/data/posflow/allinpay_utf8_zc/20181101/"
    # 20181101_loginfo_rsp_bl_new.csv
    # 20181101_rsp_agt_bl_new.del
    # 20181101_rxinfo_rsp_bl.txt

    table_name = configData.get_table_name()

    print("Start\n")

    if MyHdfsFile.isfile(a_client, file_name):
        if not configData.get_has_partition():
            sql = "LOAD DATA INPATH '{}' INTO TABLE {}".format(file_name, table_name)  # 'test.t1_trxrecprd_v2_zc'
            # '\' OVERWRITE INTO TABLE test.t1_trxrecprd_v2_bl2'
        else:
            sql = "LOAD DATA INPATH '{}' INTO TABLE {} PARTITION ( p_date='{}' )".format(file_name, table_name, p_date_str)  # 'test.t1_trxrecprd_v2_zc'
        print("OK" + "  " + sql+"\n")
        cur.execute(sql)  # , async=True)

    cur.close()
    conn.close()
Пример #2
0
def run_hive(configData: ConfigData):
    a_client = InsecureClient(url=configData.hdfs_ip(), user="******")  # "http://10.2.201.197:50070"
    conn = connect(host=configData.hive_ip(), port=configData.hive_port(), auth_mechanism=configData.hive_auth(), user=configData.hive_user())
    cur = conn.cursor()

    f_date_str = configData.get_f_date()  # "20181101"
    p_date_str = configData.get_p_date()  # "2018-11-01"

    root_path = configData.get_hdfs_path()  # "/shouyinbao/bl_shouyinbao/UTF8/"
    file_name = configData.get_file_name(f_date_str)  # "t1_trxrecord_" the_date # "_V2.csv"
    table_name = configData.get_table_name()

    print("Start\n")

    idn = 0
    branches = MyHdfsFile.get_child(a_client, str(pathlib.PurePosixPath(root_path).joinpath(f_date_str)))
    for aBranch in branches:
        if MyHdfsFile.check_branch(a_client, aBranch):
            files = MyHdfsFile.get_child(a_client, aBranch)
            f_a_branch = MyHdfsFile.get_name(aBranch)
            for aFile in files:
                if MyHdfsFile.check_file(a_client, aFile, file_name):
                    # '/shouyinbao/bl_shouyinbao/UTF8/20181101/9999997900/t1_trxrecord_20181101_V2.csv'
                    to_file2 = str(pathlib.PurePosixPath(root_path).joinpath(f_date_str, f_a_branch, file_name))
                    if not configData.get_has_partition():
                        sql = "LOAD DATA INPATH '{}' INTO TABLE {}".format(to_file2, table_name)  # 'test.t1_trxrecprd_v2_zc'
                    # '\' OVERWRITE INTO TABLE test.t1_trxrecprd_v2_bl2'
                    else:
                        sql = "LOAD DATA INPATH '{}' INTO TABLE {} PARTITION ( p_date='{}' )".format(to_file2, table_name, p_date_str)  # 'test.t1_trxrecprd_v2_zc'
                    idn += 1
                    print(str(idn) + "  " + sql + "\n")
                    cur.execute(sql)  # , async=True)

    cur.close()
    conn.close()
def run_remove_hive(configData: ConfigData):
    f_date_str = configData.get_f_date()  # "20181101"
    p_date_str = configData.get_p_date()  # "2018-11-01"

    del_table = configData.get_table_name()   # "hive_table" + str(configData.the_id) # "rds_posflow.loginfo_rsp_bl"
    print(configData.cdh_ip()+del_table+f_date_str+configData.get_file_name(f_date_str)+configData.hive_ip())
    if not configData.get_has_partition():
        del_file = configData.get_file_name(f_date_str).replace('.', '*.')  # "file_ext" + str(configData.the_id)
        MyHdfsFile.delete_hive_ssh(configData.cdh_ip(), table=del_table, p_name=del_file, username=configData.cdh_user(), password=configData.cdh_pass())

    else:
        conn = connect(host=configData.hive_ip(), port=configData.hive_port(), auth_mechanism=configData.hive_auth(), user=configData.hive_user())
        cur = conn.cursor()

        # "ALTER TABLE rds_posflow.t1_trxrecprd_v2_tmp DROP IF EXISTS PARTITION(p_date='2019-02-08') "
        sql = "ALTER TABLE {} DROP IF EXISTS PARTITION( p_date='{}' )".format(del_table, p_date_str)
        print(sql)
        cur.execute(sql)

        cur.close()
        conn.close()
Пример #4
0
def run_remove_hive(configData: ConfigData):
    f_date_str = configData.get_f_date()  # "20181101"
    p_date_str = configData.get_p_date()  # "2018-11-01"
    # "/user/hive/warehouse/rds_posflow.db/t1_trxrecprd_v2/t1_trxrecord_20181204_V2*.csv"

    del_table = configData.get_table_name()   # hive_table="rds_posflow.t1_trxrecprd_v2"

    if not configData.get_has_partition():
        del_file = configData.get_file_name(f_date_str).replace('.', '*.')
        MyHdfsFile.delete_hive_ssh(configData.cdh_ip(), table=del_table, p_name=del_file, username=configData.cdh_user(), password=configData.cdh_pass())

    else:
        conn = connect(host=configData.hive_ip(), port=configData.hive_port(), auth_mechanism=configData.hive_auth(), user=configData.hive_user())
        cur = conn.cursor()

        # "ALTER TABLE rds_posflow.t1_trxrecprd_v2_tmp DROP IF EXISTS PARTITION(p_date=2019-02-08) "
        sql = "ALTER TABLE {} DROP IF EXISTS PARTITION( p_date='{}' )".format(del_table, p_date_str)
        print(sql)
        cur.execute(sql)

        cur.close()
        conn.close()