def __setup_remote_paths(self): """ Actually create the working directory and copy the module into it. Note: the script has to be readable by Hadoop; though this may not generally be a problem on HDFS, where the Hadoop user is usually the superuser, things may be different if our working directory is on a shared POSIX filesystem. Therefore, we make the directory and the script accessible by all. """ self.logger.debug("remote_wd: %s", self.remote_wd) self.logger.debug("remote_exe: %s", self.remote_exe) self.logger.debug("remotes: %s", self.files_to_upload) if self.args.module: self.logger.debug('Generated pipes_code:\n\n %s', self._generate_pipes_code()) if not self.args.pretend: hdfs.mkdir(self.remote_wd) hdfs.chmod(self.remote_wd, "a+rx") self.logger.debug("created and chmod-ed: %s", self.remote_wd) pipes_code = self._generate_pipes_code() hdfs.dump(pipes_code, self.remote_exe) self.logger.debug("dumped pipes_code to: %s", self.remote_exe) hdfs.chmod(self.remote_exe, "a+rx") self.__warn_user_if_wd_maybe_unreadable(self.remote_wd) for (l, h, _) in self.files_to_upload: self.logger.debug("uploading: %s to %s", l, h) hdfs.cp(l, h) self.logger.debug("Created%sremote paths:" % (' [simulation] ' if self.args.pretend else ' '))
def __setup_remote_paths(self): """ Actually create the working directory and copy the module into it. Note: the script has to be readable by Hadoop; though this may not generally be a problem on HDFS, where the Hadoop user is usually the superuser, things may be different if our working directory is on a shared POSIX filesystem. Therefore, we make the directory and the script accessible by all. """ self.logger.debug("remote_wd: %s", self.remote_wd) self.logger.debug("remote_exe: %s", self.remote_exe) self.logger.debug("remotes: %s", self.files_to_upload) if self.args.module: self.logger.debug( 'Generated pipes_code:\n\n %s', self._generate_pipes_code() ) if not self.args.pretend: hdfs.mkdir(self.remote_wd) hdfs.chmod(self.remote_wd, "a+rx") self.logger.debug("created and chmod-ed: %s", self.remote_wd) pipes_code = self._generate_pipes_code() hdfs.dump(pipes_code, self.remote_exe) self.logger.debug("dumped pipes_code to: %s", self.remote_exe) hdfs.chmod(self.remote_exe, "a+rx") self.__warn_user_if_wd_maybe_unreadable(self.remote_wd) for (l, h, _) in self.files_to_upload: self.logger.debug("uploading: %s to %s", l, h) hdfs.cp(l, h) self.logger.debug("Created%sremote paths:" % (' [simulation] ' if self.args.pretend else ' '))
def _create_directories(app_id, run_id, param_string, type, sub_type=None): """ Creates directories for an experiment, if Experiments folder exists it will create directories below it, otherwise it will create them in the Logs directory. Args: :app_id: YARN application ID of the experiment :run_id: Experiment ID :param_string: name of the new directory created under parent directories :type: type of the new directory parent, e.g differential_evolution :sub_type: type of sub directory to parent, e.g generation Returns: The new directories for the yarn-application and for the execution (hdfs_exec_logdir, hdfs_appid_logdir) """ pyhdfs_handle = get() if pyhdfs_handle.exists(project_path() + "Experiments"): hdfs_events_parent_dir = project_path() + "Experiments" elif pyhdfs_handle.exists(project_path() + "Logs"): hdfs_events_parent_dir = project_path() + "Logs/TensorFlow" try: st = hdfs.stat(hdfs_events_parent_dir) if not bool(st.st_mode & local_stat.S_IWGRP ): # if not group writable make it so hdfs.chmod(hdfs_events_parent_dir, "g+w") except IOError: # If this happens then the permission is set correct already since the creator of the /Logs/TensorFlow already set group writable pass hdfs_appid_logdir = hdfs_events_parent_dir + "/" + app_id # if not pyhdfs_handle.exists(hdfs_appid_logdir): # pyhdfs_handle.create_directory(hdfs_appid_logdir) hdfs_run_id_logdir = hdfs_appid_logdir + "/" + type + "/run." + str(run_id) # determine directory structure based on arguments if sub_type: hdfs_exec_logdir = hdfs_run_id_logdir + "/" + str( sub_type) + '/' + str(param_string) elif not param_string and not sub_type: hdfs_exec_logdir = hdfs_run_id_logdir + '/' else: hdfs_exec_logdir = hdfs_run_id_logdir + '/' + str(param_string) # Need to remove directory if it exists (might be a task retry) if pyhdfs_handle.exists(hdfs_exec_logdir): pyhdfs_handle.delete(hdfs_exec_logdir, recursive=True) # create the new directory pyhdfs_handle.create_directory(hdfs_exec_logdir) # update logfile logfile = hdfs_exec_logdir + '/' + 'logfile' os.environ['EXEC_LOGFILE'] = logfile return hdfs_exec_logdir, hdfs_appid_logdir
def copy_file_2_remote_dir(remote_dir, log_file): LOGGER = logging.getLogger(__name__) suffix = time.strftime('%d-%m-%y_%H-%M-%S', time.gmtime(log_file.mtime)) dest_filename = os.path.join(remote_dir, "{0}-{1}".format(log_file.filename, suffix)) LOGGER.debug("Copying {0} to {1}".format(log_file.filepath, dest_filename)) hdfs.put(log_file.filepath, dest_filename) LOGGER.debug("Copied {0} to HDFS".format(log_file.filepath)) hdfs.chmod(dest_filename, BACKUP_PERMISSIONS) LOGGER.debug("Changed permissions for {0}".format(dest_filename))
def mapper(_, record, writer, conf): out_dir = conf.get('out.dir', utils.make_random_str()) if not hdfs.path.isdir(out_dir): hdfs.mkdir(out_dir) hdfs.chmod(out_dir, 'g+rwx') img_path = record.strip() a = get_array(img_path) out_a = calc_features(a) out_path = hdfs.path.join(out_dir, '%s.out' % hdfs.path.basename(img_path)) with hdfs.open(out_path, 'w') as fo: np.save(fo, out_a) # actual output hdfs.chmod(out_path, 'g+rw') writer.emit(img_path, fo.name) # info (tab-separated input-output)
def chmod(hdfs_path, mode, project=None): """ Change file mode bits. Args: :hdfs_path: You can specify either a full hdfs pathname or a relative one (relative to your Project's path in HDFS). :mode: File mode (user/group/world privilege) bits :project: If this value is not specified, it will get the path to your project. If you need to path to another project, you can specify the name of the project as a string. """ if project == None: project = project_name() hdfs_path = _expand_path(hdfs_path, project) return hdfs.chmod(hdfs_path, mode)
def backup(config): LOGGER = logging.getLogger(__name__) remote_basedir = config.get('backup', 'remote-basedir') local_log_dir = config.get('backup', 'local-log-dir') checkpoint_file = config.get('backup', 'checkpoint') if not remote_dir_exists(remote_basedir): LOGGER.debug("Remote directory {0} does not exist, creating it".format( remote_basedir)) create_remote_dir(remote_basedir) hdfs.chmod(remote_basedir, BACKUP_PERMISSIONS) log_files = list_local_files(local_log_dir) now = time.time() checkpoint = load_checkpoint(checkpoint_file) copied_log_files = {} for log_file in log_files: if log_file.mtime > checkpoint: remote_dir = get_remote_dir(log_file, remote_basedir) if not remote_dir_exists(remote_dir): create_remote_dir(remote_dir) LOGGER.debug("Created remote directory {0}".format(remote_dir)) try: copy_file_2_remote_dir(remote_dir, log_file) copied_log_files[log_file] = remote_dir except Exception as ex: LOGGER.warn("Error while copying {0} - {1}".format( log_file, ex)) LOGGER.debug("Finished copying, updating checkpoint") write_checkpoint(checkpoint_file) if not copied_log_files: LOGGER.debug("Did not copy any log file") else: for lf, rd in copied_log_files.iteritems(): LOGGER.info("Copied file {0} to {1}".format(lf, rd)) LOGGER.info("Finished copying files")
def backup(config): LOGGER = logging.getLogger(__name__) remote_basedir = config.get('backup', 'remote-basedir') local_log_dir = config.get('backup', 'local-log-dir') checkpoint_file = config.get('backup', 'checkpoint') if not remote_dir_exists(remote_basedir): LOGGER.debug("Remote directory {0} does not exist, creating it".format(remote_basedir)) create_remote_dir(remote_basedir) hdfs.chmod(remote_basedir, BACKUP_PERMISSIONS) log_files = list_local_files(local_log_dir) now = time.time() checkpoint = load_checkpoint(checkpoint_file) copied_log_files = {} for log_file in log_files: if log_file.mtime > checkpoint: remote_dir = get_remote_dir(log_file, remote_basedir) if not remote_dir_exists(remote_dir): create_remote_dir(remote_dir) LOGGER.debug("Created remote directory {0}".format(remote_dir)) try: copy_file_2_remote_dir(remote_dir, log_file) copied_log_files[log_file] = remote_dir except Exception as ex: LOGGER.warn("Error while copying {0} - {1}".format(log_file, ex)) LOGGER.debug("Finished copying, updating checkpoint") write_checkpoint(checkpoint_file) if not copied_log_files: LOGGER.debug("Did not copy any log file") else: for lf, rd in copied_log_files.iteritems(): LOGGER.info("Copied file {0} to {1}".format(lf, rd)) LOGGER.info("Finished copying files")
def __setup_remote_paths(self): """ Actually create the working directory and copy the module into it. Note: the script has to be readable by Hadoop; though this may not generally be a problem on HDFS, where the Hadoop user is usually the superuser, things may be different if our working directory is on a shared POSIX filesystem. Therefore, we make the directory and the script accessible by all. """ pipes_code = self.__generate_pipes_code() hdfs.mkdir(self.remote_wd) hdfs.chmod(self.remote_wd, "a+rx") hdfs.dump(pipes_code, self.remote_exe) hdfs.chmod(self.remote_exe, "a+rx") hdfs.put(self.args.module, self.remote_module) hdfs.chmod(self.remote_module, "a+r") self.__warn_user_if_wd_maybe_unreadable(self.remote_wd) self.logger.debug("Created remote paths:") self.logger.debug(self.remote_wd) self.logger.debug(self.remote_exe) self.logger.debug(self.remote_module)
def __test(self, offset, user=None): for mode in os.R_OK, os.W_OK, os.X_OK: hdfs.chmod(self.path, mode << offset) print ' * mode now: %03o' % hdfs.path.stat(self.path).st_mode self.assertTrue(hdfs.path.access(self.path, mode, user=user))
def chmod(self): with tempfile.NamedTemporaryFile(suffix='_%s' % UNI_CHR) as f: hdfs.chmod("file://" + f.name, 444) s = os.stat(f.name) self.assertEqual(444, stat.S_IMODE(s.st_mode))
def chmod(self): with tempfile.NamedTemporaryFile() as f: hdfs.chmod("file://" + f.name, 444) s = os.stat(f.name) self.assertEqual(444, stat.S_IMODE(s.st_mode))
def chmod(self, path, mode): hdfs.chmod(path, mode)