def run_hive(configData: ConfigData): a_client = InsecureClient(url=configData.hdfs_ip(), user="******") # "http://10.2.201.197:50070" conn = connect(host=configData.hive_ip(), port=configData.hive_port(), auth_mechanism=configData.hive_auth(), user=configData.hive_user()) cur = conn.cursor() f_date_str = configData.get_f_date() # "20181101" p_date_str = configData.get_p_date() # "2018-11-01" root_path = configData.get_hdfs_path() # "/shouyinbao/bl_shouyinbao/UTF8/" file_name = configData.get_file_name(f_date_str) # "t1_trxrecord_" the_date # "_V2.csv" table_name = configData.get_table_name() print("Start\n") idn = 0 branches = MyHdfsFile.get_child(a_client, str(pathlib.PurePosixPath(root_path).joinpath(f_date_str))) for aBranch in branches: if MyHdfsFile.check_branch(a_client, aBranch): files = MyHdfsFile.get_child(a_client, aBranch) f_a_branch = MyHdfsFile.get_name(aBranch) for aFile in files: if MyHdfsFile.check_file(a_client, aFile, file_name): # '/shouyinbao/bl_shouyinbao/UTF8/20181101/9999997900/t1_trxrecord_20181101_V2.csv' to_file2 = str(pathlib.PurePosixPath(root_path).joinpath(f_date_str, f_a_branch, file_name)) if not configData.get_has_partition(): sql = "LOAD DATA INPATH '{}' INTO TABLE {}".format(to_file2, table_name) # 'test.t1_trxrecprd_v2_zc' # '\' OVERWRITE INTO TABLE test.t1_trxrecprd_v2_bl2' else: sql = "LOAD DATA INPATH '{}' INTO TABLE {} PARTITION ( p_date='{}' )".format(to_file2, table_name, p_date_str) # 'test.t1_trxrecprd_v2_zc' idn += 1 print(str(idn) + " " + sql + "\n") cur.execute(sql) # , async=True) cur.close() conn.close()
def run_hive(conf: ConfigData, the_date: str, is_baoli=True): p_client = Client(conf.hdfs_ip()) # "http://10.2.201.197:50070" conn = connect(host=conf.hive_ip(), port=conf.hive_port(), auth_mechanism=conf.hive_auth(), user=conf.hive_user()) cur = conn.cursor() the_date = StrTool.get_the_date_str(the_date) # "20181101" root_path = conf.get_hdfs_path() # "/shouyinbao/bl_shouyinbao/UTF8/" f_name = conf.get_file_name( the_date) # "t1_trxrecord_" the_date # "_V2.csv" table_name = conf.get_table_name() print("Start\n") idn = 0 branches = MyHdfsFile.get_child(p_client, root_path + the_date) for aBranch in branches: if MyHdfsFile.check_branch(p_client, aBranch): files = MyHdfsFile.get_child(p_client, aBranch) f_a_branch = MyHdfsFile.get_name(aBranch) for aFile in files: if MyHdfsFile.check_file(p_client, aFile, f_name): # '/shouyinbao/bl_shouyinbao/UTF8/20181101/9999997900/t1_trxrecord_20181101_V2.csv' to_file2 = str( pathlib.PurePosixPath(root_path).joinpath( the_date, f_a_branch, f_name)) if conf.m_project_id == 1: sql = 'LOAD DATA INPATH \'{}\' INTO TABLE {}'.format( to_file2, table_name) # 'test.t1_trxrecprd_v2_zc' # '\' OVERWRITE INTO TABLE test.t1_trxrecprd_v2_bl2' elif conf.m_project_id == 2: sql = 'LOAD DATA INPATH \'{}\' INTO TABLE {} PARTITION ( p_branch=\'{}\', p_date={} )'.format( to_file2, table_name, f_a_branch, the_date) # 'test.t1_trxrecprd_v2_zc' idn += 1 print(str(idn) + " " + sql + "\n") cur.execute(sql) # , async=True) cur.close() conn.close()
hdfs_path = str(pathlib.PurePosixPath(conf.get_hdfs_path()).joinpath(sdate)) shutil.rmtree(data_path, ignore_errors=True) shutil.rmtree(utf8_path, ignore_errors=True) client = MyClient(conf.hdfs_ip()) # "http://10.2.201.197:50070" client.delete(hdfs_path, recursive=True) # "/user/hive/warehouse/posflow.db/t1_trxrecprd_v2/t1_trxrecord_20181204_V2*.csv" # hive_table="posflow.t1_trxrecprd_v2", # file_pre1 = 't1_trxrecord_', # file_ext2 = "_V2.csv", if __name__ == "__main__": the_conf = ConfigData(p_is_test=False) client = Client(the_conf.hdfs_ip()) # "http://10.2.201.197:50070" a = MyHdfsFile.get_child(client, "/data/posflow/allinpay_utf8_zc") b = MyHdfsFile.get_child_file(client,"/data/posflow/allinpay_utf8_zc") c = MyHdfsFile.get_child_dir(client, "/data/posflow/allinpay_utf8_zc") # test # MyHdfsFile.delete(client, "/data/posflow/allinpay_utf8_zc", "*agt_cpy*") # test if the_conf.is_test(): day_str = the_conf.test_date() days = 9 else: day_str = StrTool.get_param_str(1, "") days = StrTool.get_param_int(2, 1) day_str = StrTool.get_the_date_str(day_str)