class HDFSTensorboardManager(base.TensorboardManager): """ Store and tfevents files to HDFS. """ @util.preserve_random_state def __init__( self, hdfs_url: str, hdfs_path: str, user: Optional[str] = None, *args: Any, **kwargs: Any, ) -> None: super().__init__(*args, **kwargs) self.hdfs_url = hdfs_url self.hdfs_path = hdfs_path self.user = user self.client = InsecureClient(self.hdfs_url, root=self.hdfs_path, user=self.user) self.client.makedirs(str(self.sync_path)) @util.preserve_random_state def sync(self) -> None: for path in self.to_sync(): file_name = str(self.sync_path.joinpath(path.name)) logging.debug(f"Uploading {path} to {self.hdfs_path}") self.client.upload(file_name, str(path)) def delete(self) -> None: self.client.delete(self.sync_path, recursive=True)
def run_conv_file_local_to_hdfs(configData: ConfigData): """ # client.upload('/shouyinbao/', "/home/testFolder/logflow/bl_shouyinbao/UTF8/20181101/9999100000/t1_trxrecord_20181101_V2.csv", cleanup=True) # dat = client.list('/shouyinbao/', status=False) # print(dat) # root_path = "/home/bd/桌面/201811_flow/zc_shouyinbao/UNZIP/" # dest_dir1 = "/home/bd/桌面/201811_flow/zc_shouyinbao/UTF8/" # dest_dir2 = "/shouyinbao/zc_shouyinbao/UTF8/" # root_path = "/home/testFolder/logflow/bl_shouyinbao/UNZIP/" # dest_dir1 = "/home/testFolder/logflow/bl_shouyinbao/UTF8/" # dest_dir2 = "/shouyinbao/zc_shouyinbao/UTF8/" # i_file = '/home/testFolder/logflow/bl_shouyinbao/20181101/9999100000/t1_trxrecord_20181101_V2.csv' # o_file = '/home/testFolder/logflow/bl_shouyinbao/UTF8/20181101/9999100000/t1_trxrecord_20181101_V2.csv' :param configData: :param the_date: :param is_baoli: :return: """ f_date_str = configData.get_f_date() # "20181101" a_client = InsecureClient(configData.hdfs_ip(), user="******") # "http://10.2.201.197:50070" # webhdfs 默认是 dr.who ,不能伪装成其他用户,可以在配置里修改 hadoop.http.staticuser.user=dr.who # https://www.cnblogs.com/peizhe123/p/5540845.html root_path = os.path.join(configData.get_data_path(), f_date_str) dest_dir1 = os.path.join(configData.get_utf8_path(), f_date_str) dest_dir2 = str(pathlib.PurePosixPath(configData.get_hdfs_path()).joinpath(f_date_str)) print("Start\n") f_name = configData.get_file_name(f_date_str) # "t1_trxrecord_" the_date # "_V2.csv" branches = MyLocalFile.get_child_dir(root_path) for aBranch in branches: if MyLocalFile.check_branch(aBranch): files = MyLocalFile.get_child_file(aBranch) f_a_branch = os.path.basename(aBranch) for aFile in files: if MyLocalFile.check_file(aFile, f_name): to_file1 = str(pathlib.PurePath(dest_dir1).joinpath(f_a_branch, f_name)) to_file2 = str(pathlib.PurePosixPath(dest_dir2).joinpath(f_a_branch, f_name)) f_add_head = configData.get_hive_add_date(f_a_branch) f_add_end = configData.get_hive_add_date("789") f_need_head = not configData.get_hive_head() # False MyLocalFile.conv_file_local(aFile, to_file1, need_first_line=f_need_head,p_add_head=f_add_head, p_add_tail=f_add_end,quoting="") MyHdfsFile.safe_make_dir(a_client, to_file2) # client.newupload(to_file2, to_file1, encoding='utf-8') the_file = a_client.status(to_file2, strict=False) if the_file is None: a_client.upload(to_file2, to_file1) #, encoding='utf-8') a_client.set_permission(to_file2, 777) # a_client.set_owner(thePath,owner='hdfs',group='supergroup') elif the_file['type'].lower() == 'file': # 'directory' a_client.set_permission(to_file2, 777)
def run_conv_file_local_to_hdfs(configData: ConfigData): """ :param configData: :return: """ f_date_str = configData.get_f_date() # "20181101" a_client = InsecureClient(configData.hdfs_ip(), user="******") # "http://10.2.201.197:50070" root_path = os.path.join(configData.get_data_path(), f_date_str) # allinpay_data_bl dest_dir1 = os.path.join(configData.get_utf8_path(), f_date_str) # allinpay_utf8_bl dest_dir2 = str(pathlib.PurePosixPath(configData.get_hdfs_path()).joinpath(f_date_str)) # hdfs_dir_bl # file_ext7 = configData.get_data("file_ext7") # _loginfo_rsp_bl_new.csv # 20181101_loginfo_rsp_bl_new.csv # file_ext8 = configData.get_data("file_ext8") # _rsp_agt_bl_new.del # 20181101_rsp_agt_bl_new.del # file_ext9 = configData.get_data("file_ext9") # _rxinfo_rsp_bl.txt # 20181101_rxinfo_rsp_bl.txt # f_list = [file_ext7, file_ext8, file_ext9] print("Start\n") # "file_ext" + str(configData.the_id) file_name = configData.get_file_name(f_date_str).lower() files = MyLocalFile.get_child_file(root_path) for aFile in files: short_name = os.path.basename(aFile).lower() f_name = pathlib.PurePath(aFile).name if short_name == file_name: to_file1 = str(pathlib.PurePath(dest_dir1).joinpath(f_name)) to_file2 = str(pathlib.PurePosixPath(dest_dir2).joinpath(f_name)) f_add_date = configData.get_hive_add_date(f_date_str) f_need_head = not configData.get_hive_head() MyLocalFile.conv_file_local(aFile, to_file1, need_first_line=f_need_head, p_add_head=f_add_date) MyHdfsFile.safe_make_dir(a_client, to_file2) # a_client.newupload(to_file2, to_file1, encoding='utf-8') the_file = a_client.status(to_file2, strict=False) if the_file is None: a_client.upload(to_file2, to_file1) a_client.set_permission(to_file2, 777) # a_client.set_owner(thePath,owner='hdfs',group='supergroup') elif the_file['type'].lower() == 'file': # 'directory' a_client.set_permission(to_file2, 777)
class HDFSTensorboardManager(base.TensorboardManager): """ Store and tfevents files to HDFS. """ @util.preserve_random_state def __init__( self, hdfs_url: str, hdfs_path: str, user: Optional[str] = None, *args: Any, **kwargs: Any, ) -> None: super().__init__(*args, **kwargs) self.hdfs_url = hdfs_url self.hdfs_path = hdfs_path self.user = user self.client = InsecureClient(self.hdfs_url, root=self.hdfs_path, user=self.user) self.client.makedirs(str(self.sync_path)) @util.preserve_random_state def sync( self, selector: Callable[[pathlib.Path], bool] = lambda _: True, mangler: Callable[[pathlib.Path, int], pathlib.Path] = lambda p, __: p, rank: int = 0, ) -> None: for path in self.to_sync(selector): relative_path = path.relative_to(self.base_path) mangled_relative_path = mangler(relative_path, rank) mangled_path = self.sync_path.joinpath(mangled_relative_path) file_name = str(mangled_path) logging.debug(f"Uploading {path} to {self.hdfs_path}") self.client.upload(file_name, str(path)) def delete(self) -> None: self.client.delete(self.sync_path, recursive=True)
class HDFSStorageManager(StorageManager): """ Store and load checkpoints from HDFS. """ def __init__( self, hdfs_url: str, hdfs_path: str, user: Optional[str] = None, temp_dir: Optional[str] = None, ) -> None: super().__init__( temp_dir if temp_dir is not None else tempfile.gettempdir()) self.hdfs_url = hdfs_url self.hdfs_path = hdfs_path self.user = user self.client = InsecureClient(self.hdfs_url, root=self.hdfs_path, user=self.user) def post_store_path(self, storage_id: str, storage_dir: str, metadata: StorageMetadata) -> None: """post_store_path uploads the checkpoint to hdfs and deletes the original files.""" try: logging.info("Uploading storage {} to HDFS".format(storage_id)) result = self.client.upload(metadata, storage_dir) logging.info("Uploaded storage {} to HDFS path {}".format( storage_id, result)) finally: self._remove_checkpoint_directory(metadata.storage_id) @contextlib.contextmanager def restore_path(self, metadata: StorageMetadata) -> Iterator[str]: logging.info("Downloading storage {} from HDFS".format( metadata.storage_id)) self.client.download(metadata.storage_id, self._base_path, overwrite=True) try: yield os.path.join(self._base_path, metadata.storage_id) finally: self._remove_checkpoint_directory(metadata.storage_id) def delete(self, metadata: StorageMetadata) -> None: logging.info("Deleting storage {} from HDFS".format( metadata.storage_id)) self.client.delete(metadata.storage_id, recursive=True)
class HDFSStorageManager(storage.CloudStorageManager): """ Store and load checkpoints from HDFS. """ def __init__( self, hdfs_url: str, hdfs_path: str, user: Optional[str] = None, temp_dir: Optional[str] = None, ) -> None: super().__init__(temp_dir if temp_dir is not None else tempfile.gettempdir()) self.hdfs_url = hdfs_url self.hdfs_path = hdfs_path self.user = user self.client = InsecureClient(self.hdfs_url, root=self.hdfs_path, user=self.user) @util.preserve_random_state def upload(self, src: Union[str, os.PathLike], dst: str) -> None: src = os.fspath(src) logging.info(f"Uploading to HDFS: {dst}") self.client.upload(dst, src) @util.preserve_random_state def download(self, src: str, dst: Union[str, os.PathLike]) -> None: dst = os.fspath(dst) logging.info(f"Downloading {src} from HDFS") self.client.download(src, dst, overwrite=True) @util.preserve_random_state def delete(self, tgt: str) -> None: logging.info(f"Deleting {tgt} from HDFS") self.client.delete(tgt, recursive=True)
def upload_and_delete_file(file_path, upload_path): for file_name in os.listdir(file_path): try: # session = Session() # session.auth = HTTPBasicAuth('username', 'password') # client = InsecureClient(url=HDFS_HOSTS, user="******", session=session) client = InsecureClient(url=HDFS_HOSTS, user=HDFS_USER) # client.delete("/data/scan/sm_word_log_message-2018-06-27-20") # print(client.list("/data/scan/sm/", status=False)) path = client.upload(upload_path + file_name, file_path + file_name, cleanup=True) if path: os.remove(file_path + file_name) except Exception as e: logger.error("upload_and_delete_file error = %s", str(e))