def _copy_base_conf(self): """Copy base configuration files to tmp dir.""" self.temp_conf_dir = tempfile.mkdtemp("", "hadoop-", "/tmp") if os.path.exists(self.local_base_conf_dir): base_conf_files = [ os.path.join(self.local_base_conf_dir, f) for f in os.listdir(self.local_base_conf_dir) ] for f in base_conf_files: shutil.copy(f, self.temp_conf_dir) else: logger.warn( "Local conf dir does not exist. Using default configuration") base_conf_files = [] mandatory_files = [ CORE_CONF_FILE, HDFS_CONF_FILE, MR_CONF_FILE, YARN_CONF_FILE ] missing_conf_files = mandatory_files for f in base_conf_files: f_base_name = os.path.basename(f) if f_base_name in missing_conf_files: missing_conf_files.remove(f_base_name) logger.info("Copying missing conf files from master: " + str(missing_conf_files)) remote_missing_files = [ os.path.join(self.conf_dir, f) for f in missing_conf_files ] action = Get([self.master], remote_missing_files, self.temp_conf_dir) action.run()
def _copy_base_conf(self): """Copy base configuration files to tmp dir.""" self.temp_conf_dir = tempfile.mkdtemp("", "hadoop-", "/tmp") if os.path.exists(self.local_base_conf_dir): base_conf_files = [os.path.join(self.local_base_conf_dir, f) for f in os.listdir(self.local_base_conf_dir)] for f in base_conf_files: shutil.copy(f, self.temp_conf_dir) else: logger.warn( "Local conf dir does not exist. Using default configuration") base_conf_files = [] mandatory_files = [CORE_CONF_FILE, HDFS_CONF_FILE, MR_CONF_FILE, YARN_CONF_FILE] missing_conf_files = mandatory_files for f in base_conf_files: f_base_name = os.path.basename(f) if f_base_name in missing_conf_files: missing_conf_files.remove(f_base_name) logger.info("Copying missing conf files from master: " + str( missing_conf_files)) remote_missing_files = [os.path.join(self.conf_dir, f) for f in missing_conf_files] action = Get([self.master], remote_missing_files, self.temp_conf_dir) action.run()
def copy_history(self, dest, job_ids=None): """Copy history logs from dfs. Args: dest (str): The path of the local dir where the logs will be copied. job_ids (list of str, optional): A list with the ids of the jobs for which the history should be copied. If nothing is passed, the history of all jobs is copied. """ if not os.path.exists(dest): logger.warning("Destination directory " + dest + " does not exist. It will be created") os.makedirs(dest) # Dirs used user_login = getpass.getuser() hist_dfs_dir = "/tmp/hadoop-yarn/staging/history/done_intermediate/" + \ user_login hist_tmp_dir = "/tmp/hadoop_hist" # Remove file in tmp dir if exists proc = SshProcess("rm -rf " + hist_tmp_dir, self.master) proc.run() # Get files in master if job_ids: proc = SshProcess("mkdir " + hist_tmp_dir, self.master) proc.run() for jid in job_ids: self.execute("fs -get " + hist_dfs_dir + "/" + jid + "* " + hist_tmp_dir, verbose=False) else: self.execute("fs -get " + hist_dfs_dir + " " + hist_tmp_dir, verbose=False) # Copy files from master action = Get([self.master], [hist_tmp_dir], dest) action.run()
def _initialize_conf(self): """Merge locally-specified configuration files with default files from the distribution.""" if os.path.exists(self.local_base_conf_dir): base_conf_files = [os.path.join(self.local_base_conf_dir, f) for f in os.listdir(self.local_base_conf_dir)] for f in base_conf_files: shutil.copy(f, self.init_conf_dir) else: logger.warn("Local conf dir does not exist. Using default configuration") base_conf_files = [] missing_conf_files = self.conf_mandatory_files for f in base_conf_files: f_base_name = os.path.basename(f) if f_base_name in missing_conf_files: missing_conf_files.remove(f_base_name) logger.info("Copying missing conf files from master: " + str(missing_conf_files)) remote_missing_files = [os.path.join(self.conf_dir, f) for f in missing_conf_files] action = Get([self.hosts[0]], remote_missing_files, self.init_conf_dir) action.run()