def wait_for_dfs_directory_created(self, dir_path, ignored_dfs_dirs): import params if not is_empty(dir_path): dir_path = HdfsResourceProvider.parse_path(dir_path) if dir_path in ignored_dfs_dirs: Logger.info("Skipping DFS directory '" + dir_path + "' as it's marked to be ignored.") return Logger.info("Verifying if DFS directory '" + dir_path + "' exists.") dir_exists = None if WebHDFSUtil.is_webhdfs_available(params.is_webhdfs_enabled, params.default_fs): # check with webhdfs is much faster than executing hdfs dfs -test util = WebHDFSUtil(params.hdfs_site, params.yarn_user, params.security_enabled) list_status = util.run_command(dir_path, 'GETFILESTATUS', method='GET', ignore_status_codes=['404'], assertable_result=False) dir_exists = ('FileStatus' in list_status) else: # have to do time expensive hdfs dfs -d check. dfs_ret_code = shell.call(format("hdfs --config {hadoop_conf_dir} dfs -test -d " + dir_path), user=params.yarn_user)[0] dir_exists = not dfs_ret_code #dfs -test -d returns 0 in case the dir exists if not dir_exists: raise Fail("DFS directory '" + dir_path + "' does not exist !") else: Logger.info("DFS directory '" + dir_path + "' exists.")
def wait_for_dfs_directory_created(self, dir_path, ignored_dfs_dirs): import params if not is_empty(dir_path): dir_path = HdfsResourceProvider.parse_path(dir_path) if dir_path in ignored_dfs_dirs: Logger.info("Skipping DFS directory '" + dir_path + "' as it's marked to be ignored.") return Logger.info("Verifying if DFS directory '" + dir_path + "' exists.") dir_exists = None nameservices = namenode_ha_utils.get_nameservices(params.hdfs_site) nameservice = None if not nameservices else nameservices[-1] if WebHDFSUtil.is_webhdfs_available(params.is_webhdfs_enabled, params.dfs_type): # check with webhdfs is much faster than executing hdfs dfs -test util = WebHDFSUtil(params.hdfs_site, nameservice, params.hdfs_user, params.security_enabled) list_status = util.run_command(dir_path, 'GETFILESTATUS', method='GET', ignore_status_codes=['404'], assertable_result=False) dir_exists = ('FileStatus' in list_status) else: # have to do time expensive hdfs dfs -d check. dfs_ret_code = shell.call(format("hdfs --config {hadoop_conf_dir} dfs -test -d " + dir_path), user=params.livy2_user)[0] dir_exists = not dfs_ret_code #dfs -test -d returns 0 in case the dir exists if not dir_exists: raise Fail("DFS directory '" + dir_path + "' does not exist !") else: Logger.info("DFS directory '" + dir_path + "' exists.")
def wait_for_dfs_directories_created(self, *dirs): import params ignored_dfs_dirs = HdfsResourceProvider.get_ignored_resources_list( params.hdfs_resource_ignore_file) if params.security_enabled: Execute(format("{rm_kinit_cmd}"), user=params.yarn_user) for dir_path in dirs: self.wait_for_dfs_directory_created(dir_path, ignored_dfs_dirs)
def wait_for_dfs_directories_created(self, *dirs): import params ignored_dfs_dirs = HdfsResourceProvider.get_ignored_resources_list(params.hdfs_resource_ignore_file) if params.security_enabled: Execute( format("{rm_kinit_cmd}") , user=params.yarn_user ) for dir_path in dirs: self.wait_for_dfs_directory_created(dir_path, ignored_dfs_dirs)
def wait_for_dfs_directories_created(self, *dirs): import params ignored_dfs_dirs = HdfsResourceProvider.get_ignored_resources_list( params.hdfs_resource_ignore_file) if params.security_enabled: Execute(params.rm_kinit_cmd, user=params.yarn_user) Execute(format( "{kinit_path_local} -kt {hdfs_user_keytab} {hdfs_principal_name}" ), user=params.hdfs_user) for dir_path in dirs: self.wait_for_dfs_directory_created(dir_path, ignored_dfs_dirs)