Exemplo n.º 1
0
    def __setup_remote_paths(self):
        """
        Actually create the working directory and copy the module into it.

        Note: the script has to be readable by Hadoop; though this may not
        generally be a problem on HDFS, where the Hadoop user is usually
        the superuser, things may be different if our working directory is
        on a shared POSIX filesystem.  Therefore, we make the directory
        and the script accessible by all.
        """
        self.logger.debug("remote_wd: %s", self.remote_wd)
        self.logger.debug("remote_exe: %s", self.remote_exe)
        self.logger.debug("remotes: %s", self.files_to_upload)
        if self.args.module:
            self.logger.debug('Generated pipes_code:\n\n %s',
                              self._generate_pipes_code())
        if not self.args.pretend:
            hdfs.mkdir(self.remote_wd)
            hdfs.chmod(self.remote_wd, "a+rx")
            self.logger.debug("created and chmod-ed: %s", self.remote_wd)
            pipes_code = self._generate_pipes_code()
            hdfs.dump(pipes_code, self.remote_exe)
            self.logger.debug("dumped pipes_code to: %s", self.remote_exe)
            hdfs.chmod(self.remote_exe, "a+rx")
            self.__warn_user_if_wd_maybe_unreadable(self.remote_wd)
            for (l, h, _) in self.files_to_upload:
                self.logger.debug("uploading: %s to %s", l, h)
                hdfs.cp(l, h)
        self.logger.debug("Created%sremote paths:" %
                          (' [simulation] ' if self.args.pretend else ' '))
Exemplo n.º 2
0
    def __setup_remote_paths(self):
        """
        Actually create the working directory and copy the module into it.

        Note: the script has to be readable by Hadoop; though this may not
        generally be a problem on HDFS, where the Hadoop user is usually
        the superuser, things may be different if our working directory is
        on a shared POSIX filesystem.  Therefore, we make the directory
        and the script accessible by all.
        """
        self.logger.debug("remote_wd: %s", self.remote_wd)
        self.logger.debug("remote_exe: %s", self.remote_exe)
        self.logger.debug("remotes: %s", self.files_to_upload)
        if self.args.module:
            self.logger.debug(
                'Generated pipes_code:\n\n %s', self._generate_pipes_code()
            )
        if not self.args.pretend:
            hdfs.mkdir(self.remote_wd)
            hdfs.chmod(self.remote_wd, "a+rx")
            self.logger.debug("created and chmod-ed: %s", self.remote_wd)
            pipes_code = self._generate_pipes_code()
            hdfs.dump(pipes_code, self.remote_exe)
            self.logger.debug("dumped pipes_code to: %s", self.remote_exe)
            hdfs.chmod(self.remote_exe, "a+rx")
            self.__warn_user_if_wd_maybe_unreadable(self.remote_wd)
            for (l, h, _) in self.files_to_upload:
                self.logger.debug("uploading: %s to %s", l, h)
                hdfs.cp(l, h)
        self.logger.debug("Created%sremote paths:" %
                          (' [simulation] ' if self.args.pretend else ' '))
Exemplo n.º 3
0
def _create_directories(app_id, run_id, param_string, type, sub_type=None):
    """
    Creates directories for an experiment, if Experiments folder exists it will create directories
    below it, otherwise it will create them in the Logs directory.

    Args:
        :app_id: YARN application ID of the experiment
        :run_id: Experiment ID
        :param_string: name of the new directory created under parent directories
        :type: type of the new directory parent, e.g differential_evolution
        :sub_type: type of sub directory to parent, e.g generation

    Returns:
        The new directories for the yarn-application and for the execution (hdfs_exec_logdir, hdfs_appid_logdir)
    """

    pyhdfs_handle = get()

    if pyhdfs_handle.exists(project_path() + "Experiments"):
        hdfs_events_parent_dir = project_path() + "Experiments"
    elif pyhdfs_handle.exists(project_path() + "Logs"):
        hdfs_events_parent_dir = project_path() + "Logs/TensorFlow"
        try:
            st = hdfs.stat(hdfs_events_parent_dir)
            if not bool(st.st_mode & local_stat.S_IWGRP
                        ):  # if not group writable make it so
                hdfs.chmod(hdfs_events_parent_dir, "g+w")
        except IOError:
            # If this happens then the permission is set correct already since the creator of the /Logs/TensorFlow already set group writable
            pass

    hdfs_appid_logdir = hdfs_events_parent_dir + "/" + app_id
    # if not pyhdfs_handle.exists(hdfs_appid_logdir):
    # pyhdfs_handle.create_directory(hdfs_appid_logdir)

    hdfs_run_id_logdir = hdfs_appid_logdir + "/" + type + "/run." + str(run_id)

    # determine directory structure based on arguments
    if sub_type:
        hdfs_exec_logdir = hdfs_run_id_logdir + "/" + str(
            sub_type) + '/' + str(param_string)
    elif not param_string and not sub_type:
        hdfs_exec_logdir = hdfs_run_id_logdir + '/'
    else:
        hdfs_exec_logdir = hdfs_run_id_logdir + '/' + str(param_string)

    # Need to remove directory if it exists (might be a task retry)
    if pyhdfs_handle.exists(hdfs_exec_logdir):
        pyhdfs_handle.delete(hdfs_exec_logdir, recursive=True)

    # create the new directory
    pyhdfs_handle.create_directory(hdfs_exec_logdir)

    # update logfile
    logfile = hdfs_exec_logdir + '/' + 'logfile'
    os.environ['EXEC_LOGFILE'] = logfile

    return hdfs_exec_logdir, hdfs_appid_logdir
def copy_file_2_remote_dir(remote_dir, log_file):
    LOGGER = logging.getLogger(__name__)
    suffix = time.strftime('%d-%m-%y_%H-%M-%S', time.gmtime(log_file.mtime))
    dest_filename = os.path.join(remote_dir, "{0}-{1}".format(log_file.filename, suffix))
    LOGGER.debug("Copying {0} to {1}".format(log_file.filepath, dest_filename))
    hdfs.put(log_file.filepath, dest_filename)
    LOGGER.debug("Copied {0} to HDFS".format(log_file.filepath))
    hdfs.chmod(dest_filename, BACKUP_PERMISSIONS)
    LOGGER.debug("Changed permissions for {0}".format(dest_filename))
Exemplo n.º 5
0
def copy_file_2_remote_dir(remote_dir, log_file):
    LOGGER = logging.getLogger(__name__)
    suffix = time.strftime('%d-%m-%y_%H-%M-%S', time.gmtime(log_file.mtime))
    dest_filename = os.path.join(remote_dir,
                                 "{0}-{1}".format(log_file.filename, suffix))
    LOGGER.debug("Copying {0} to {1}".format(log_file.filepath, dest_filename))
    hdfs.put(log_file.filepath, dest_filename)
    LOGGER.debug("Copied {0} to HDFS".format(log_file.filepath))
    hdfs.chmod(dest_filename, BACKUP_PERMISSIONS)
    LOGGER.debug("Changed permissions for {0}".format(dest_filename))
Exemplo n.º 6
0
def mapper(_, record, writer, conf):
    out_dir = conf.get('out.dir', utils.make_random_str())
    if not hdfs.path.isdir(out_dir):
        hdfs.mkdir(out_dir)
        hdfs.chmod(out_dir, 'g+rwx')
    img_path = record.strip()
    a = get_array(img_path)
    out_a = calc_features(a)
    out_path = hdfs.path.join(out_dir, '%s.out' % hdfs.path.basename(img_path))
    with hdfs.open(out_path, 'w') as fo:
        np.save(fo, out_a)  # actual output
    hdfs.chmod(out_path, 'g+rw')
    writer.emit(img_path, fo.name)  # info (tab-separated input-output)
Exemplo n.º 7
0
def chmod(hdfs_path, mode, project=None):
    """
    Change file mode bits.

    Args:
        :hdfs_path: You can specify either a full hdfs pathname or a relative one (relative to your Project's path in HDFS).
        :mode: File mode (user/group/world privilege) bits
        :project: If this value is not specified, it will get the path to your project. If you need to path to another project, you can specify the name of the project as a string.
    """
    if project == None:
        project = project_name()
    hdfs_path = _expand_path(hdfs_path, project)
    return hdfs.chmod(hdfs_path, mode)
Exemplo n.º 8
0
def backup(config):
    LOGGER = logging.getLogger(__name__)
    remote_basedir = config.get('backup', 'remote-basedir')
    local_log_dir = config.get('backup', 'local-log-dir')
    checkpoint_file = config.get('backup', 'checkpoint')

    if not remote_dir_exists(remote_basedir):
        LOGGER.debug("Remote directory {0} does not exist, creating it".format(
            remote_basedir))
        create_remote_dir(remote_basedir)
        hdfs.chmod(remote_basedir, BACKUP_PERMISSIONS)
    log_files = list_local_files(local_log_dir)
    now = time.time()
    checkpoint = load_checkpoint(checkpoint_file)
    copied_log_files = {}
    for log_file in log_files:
        if log_file.mtime > checkpoint:
            remote_dir = get_remote_dir(log_file, remote_basedir)
            if not remote_dir_exists(remote_dir):
                create_remote_dir(remote_dir)
                LOGGER.debug("Created remote directory {0}".format(remote_dir))
            try:
                copy_file_2_remote_dir(remote_dir, log_file)
                copied_log_files[log_file] = remote_dir
            except Exception as ex:
                LOGGER.warn("Error while copying {0} - {1}".format(
                    log_file, ex))

    LOGGER.debug("Finished copying, updating checkpoint")
    write_checkpoint(checkpoint_file)

    if not copied_log_files:
        LOGGER.debug("Did not copy any log file")
    else:
        for lf, rd in copied_log_files.iteritems():
            LOGGER.info("Copied file {0} to {1}".format(lf, rd))

    LOGGER.info("Finished copying files")
def backup(config):
    LOGGER = logging.getLogger(__name__)
    remote_basedir = config.get('backup', 'remote-basedir')
    local_log_dir = config.get('backup', 'local-log-dir')
    checkpoint_file = config.get('backup', 'checkpoint')
    
    if not remote_dir_exists(remote_basedir):
        LOGGER.debug("Remote directory {0} does not exist, creating it".format(remote_basedir))
        create_remote_dir(remote_basedir)
        hdfs.chmod(remote_basedir, BACKUP_PERMISSIONS)
    log_files = list_local_files(local_log_dir)
    now = time.time()
    checkpoint = load_checkpoint(checkpoint_file)
    copied_log_files = {}
    for log_file in log_files:
        if log_file.mtime > checkpoint:
            remote_dir = get_remote_dir(log_file, remote_basedir)
            if not remote_dir_exists(remote_dir):
                create_remote_dir(remote_dir)
                LOGGER.debug("Created remote directory {0}".format(remote_dir))
            try:
                copy_file_2_remote_dir(remote_dir, log_file)
                copied_log_files[log_file] = remote_dir
            except Exception as ex:
                LOGGER.warn("Error while copying {0} - {1}".format(log_file, ex))

    LOGGER.debug("Finished copying, updating checkpoint")
    write_checkpoint(checkpoint_file)

    if not copied_log_files:
        LOGGER.debug("Did not copy any log file")
    else:
        for lf, rd in copied_log_files.iteritems():
            LOGGER.info("Copied file {0} to {1}".format(lf, rd))

    LOGGER.info("Finished copying files")
Exemplo n.º 10
0
    def __setup_remote_paths(self):
        """
    Actually create the working directory and copy the module into it.

    Note: the script has to be readable by Hadoop; though this may not
    generally be a problem on HDFS, where the Hadoop user is usually
    the superuser, things may be different if our working directory is
    on a shared POSIX filesystem.  Therefore, we make the directory
    and the script accessible by all.
    """
        pipes_code = self.__generate_pipes_code()
        hdfs.mkdir(self.remote_wd)
        hdfs.chmod(self.remote_wd, "a+rx")
        hdfs.dump(pipes_code, self.remote_exe)
        hdfs.chmod(self.remote_exe, "a+rx")
        hdfs.put(self.args.module, self.remote_module)
        hdfs.chmod(self.remote_module, "a+r")
        self.__warn_user_if_wd_maybe_unreadable(self.remote_wd)
        self.logger.debug("Created remote paths:")
        self.logger.debug(self.remote_wd)
        self.logger.debug(self.remote_exe)
        self.logger.debug(self.remote_module)
Exemplo n.º 11
0
  def __setup_remote_paths(self):
    """
    Actually create the working directory and copy the module into it.

    Note: the script has to be readable by Hadoop; though this may not
    generally be a problem on HDFS, where the Hadoop user is usually
    the superuser, things may be different if our working directory is
    on a shared POSIX filesystem.  Therefore, we make the directory
    and the script accessible by all.
    """
    pipes_code = self.__generate_pipes_code()
    hdfs.mkdir(self.remote_wd)
    hdfs.chmod(self.remote_wd, "a+rx")
    hdfs.dump(pipes_code, self.remote_exe)
    hdfs.chmod(self.remote_exe, "a+rx")
    hdfs.put(self.args.module, self.remote_module)
    hdfs.chmod(self.remote_module, "a+r")
    self.__warn_user_if_wd_maybe_unreadable(self.remote_wd)
    self.logger.debug("Created remote paths:")
    self.logger.debug(self.remote_wd)
    self.logger.debug(self.remote_exe)
    self.logger.debug(self.remote_module)
Exemplo n.º 12
0
 def __test(self, offset, user=None):
     for mode in os.R_OK, os.W_OK, os.X_OK:
         hdfs.chmod(self.path, mode << offset)
         print ' * mode now: %03o' % hdfs.path.stat(self.path).st_mode
         self.assertTrue(hdfs.path.access(self.path, mode, user=user))
Exemplo n.º 13
0
 def chmod(self):
     with tempfile.NamedTemporaryFile(suffix='_%s' % UNI_CHR) as f:
         hdfs.chmod("file://" + f.name, 444)
         s = os.stat(f.name)
         self.assertEqual(444, stat.S_IMODE(s.st_mode))
Exemplo n.º 14
0
 def __test(self, offset, user=None):
     for mode in os.R_OK, os.W_OK, os.X_OK:
         hdfs.chmod(self.path, mode << offset)
         print ' * mode now: %03o' % hdfs.path.stat(self.path).st_mode
         self.assertTrue(hdfs.path.access(self.path, mode, user=user))
Exemplo n.º 15
0
 def chmod(self):
   with tempfile.NamedTemporaryFile() as f:
     hdfs.chmod("file://" + f.name, 444)
     s = os.stat(f.name)
     self.assertEqual(444, stat.S_IMODE(s.st_mode))
Exemplo n.º 16
0
 def chmod(self, path, mode):
     hdfs.chmod(path, mode)