def run_hive(configData: ConfigData): a_client = InsecureClient(url=configData.hdfs_ip(), user="******") # "http://10.2.201.197:50070" conn = connect(host=configData.hive_ip(), port=configData.hive_port(), auth_mechanism=configData.hive_auth(), user=configData.hive_user()) cur = conn.cursor() f_date_str = configData.get_f_date() # "20181101" p_date_str = configData.get_p_date() # "2018-11-01" # hdfs_dir_bl root_path = str(pathlib.PurePosixPath(configData.get_hdfs_path()).joinpath(f_date_str)) file_name = str(pathlib.PurePosixPath(root_path).joinpath(configData.get_file_name(f_date_str))) # "/data/posflow/allinpay_utf8_zc/20181101/" # 20181101_loginfo_rsp_bl_new.csv # 20181101_rsp_agt_bl_new.del # 20181101_rxinfo_rsp_bl.txt table_name = configData.get_table_name() print("Start\n") if MyHdfsFile.isfile(a_client, file_name): if not configData.get_has_partition(): sql = "LOAD DATA INPATH '{}' INTO TABLE {}".format(file_name, table_name) # 'test.t1_trxrecprd_v2_zc' # '\' OVERWRITE INTO TABLE test.t1_trxrecprd_v2_bl2' else: sql = "LOAD DATA INPATH '{}' INTO TABLE {} PARTITION ( p_date='{}' )".format(file_name, table_name, p_date_str) # 'test.t1_trxrecprd_v2_zc' print("OK" + " " + sql+"\n") cur.execute(sql) # , async=True) cur.close() conn.close()
def run_hive(configData: ConfigData): a_client = InsecureClient(url=configData.hdfs_ip(), user="******") # "http://10.2.201.197:50070" conn = connect(host=configData.hive_ip(), port=configData.hive_port(), auth_mechanism=configData.hive_auth(), user=configData.hive_user()) cur = conn.cursor() f_date_str = configData.get_f_date() # "20181101" p_date_str = configData.get_p_date() # "2018-11-01" root_path = configData.get_hdfs_path() # "/shouyinbao/bl_shouyinbao/UTF8/" file_name = configData.get_file_name(f_date_str) # "t1_trxrecord_" the_date # "_V2.csv" table_name = configData.get_table_name() print("Start\n") idn = 0 branches = MyHdfsFile.get_child(a_client, str(pathlib.PurePosixPath(root_path).joinpath(f_date_str))) for aBranch in branches: if MyHdfsFile.check_branch(a_client, aBranch): files = MyHdfsFile.get_child(a_client, aBranch) f_a_branch = MyHdfsFile.get_name(aBranch) for aFile in files: if MyHdfsFile.check_file(a_client, aFile, file_name): # '/shouyinbao/bl_shouyinbao/UTF8/20181101/9999997900/t1_trxrecord_20181101_V2.csv' to_file2 = str(pathlib.PurePosixPath(root_path).joinpath(f_date_str, f_a_branch, file_name)) if not configData.get_has_partition(): sql = "LOAD DATA INPATH '{}' INTO TABLE {}".format(to_file2, table_name) # 'test.t1_trxrecprd_v2_zc' # '\' OVERWRITE INTO TABLE test.t1_trxrecprd_v2_bl2' else: sql = "LOAD DATA INPATH '{}' INTO TABLE {} PARTITION ( p_date='{}' )".format(to_file2, table_name, p_date_str) # 'test.t1_trxrecprd_v2_zc' idn += 1 print(str(idn) + " " + sql + "\n") cur.execute(sql) # , async=True) cur.close() conn.close()
def run_remove_hive(configData: ConfigData): f_date_str = configData.get_f_date() # "20181101" p_date_str = configData.get_p_date() # "2018-11-01" del_table = configData.get_table_name() # "hive_table" + str(configData.the_id) # "rds_posflow.loginfo_rsp_bl" print(configData.cdh_ip()+del_table+f_date_str+configData.get_file_name(f_date_str)+configData.hive_ip()) if not configData.get_has_partition(): del_file = configData.get_file_name(f_date_str).replace('.', '*.') # "file_ext" + str(configData.the_id) MyHdfsFile.delete_hive_ssh(configData.cdh_ip(), table=del_table, p_name=del_file, username=configData.cdh_user(), password=configData.cdh_pass()) else: conn = connect(host=configData.hive_ip(), port=configData.hive_port(), auth_mechanism=configData.hive_auth(), user=configData.hive_user()) cur = conn.cursor() # "ALTER TABLE rds_posflow.t1_trxrecprd_v2_tmp DROP IF EXISTS PARTITION(p_date='2019-02-08') " sql = "ALTER TABLE {} DROP IF EXISTS PARTITION( p_date='{}' )".format(del_table, p_date_str) print(sql) cur.execute(sql) cur.close() conn.close()
def run_remove_hive(configData: ConfigData): f_date_str = configData.get_f_date() # "20181101" p_date_str = configData.get_p_date() # "2018-11-01" # "/user/hive/warehouse/rds_posflow.db/t1_trxrecprd_v2/t1_trxrecord_20181204_V2*.csv" del_table = configData.get_table_name() # hive_table="rds_posflow.t1_trxrecprd_v2" if not configData.get_has_partition(): del_file = configData.get_file_name(f_date_str).replace('.', '*.') MyHdfsFile.delete_hive_ssh(configData.cdh_ip(), table=del_table, p_name=del_file, username=configData.cdh_user(), password=configData.cdh_pass()) else: conn = connect(host=configData.hive_ip(), port=configData.hive_port(), auth_mechanism=configData.hive_auth(), user=configData.hive_user()) cur = conn.cursor() # "ALTER TABLE rds_posflow.t1_trxrecprd_v2_tmp DROP IF EXISTS PARTITION(p_date=2019-02-08) " sql = "ALTER TABLE {} DROP IF EXISTS PARTITION( p_date='{}' )".format(del_table, p_date_str) print(sql) cur.execute(sql) cur.close() conn.close()