Пример #1
0
def _version_resources(versioned_resources, rundir):
    """

    Args:
        versioned_resources:
        rundir:

    Returns:

    """
    if not versioned_resources:
        return None
    pyhdfs_handle = hdfs.get()
    pyhdfs_handle.create_directory(rundir)
    endpoint_prefix = hdfs.project_path()
    versioned_paths = []
    for hdfs_resource in versioned_resources:
        if pydoop.hdfs.path.exists(hdfs_resource):
            log("Versoning resource '%s' in rundir '%s'" %
                (hdfs_resource, rundir))

            # Remove the file if it exists
            target_path = os.path.join(rundir, os.path.basename(hdfs_resource))
            if hdfs.exists(target_path):
                hdfs.rmr(target_path)

            hdfs.cp(hdfs_resource, rundir)
            path, filename = os.path.split(hdfs_resource)
            versioned_paths.append(
                rundir.replace(endpoint_prefix, '') + '/' + filename)
        else:
            log("Resource not found '%s'" % hdfs_resource, level='warning')
            #raise Exception('Could not find resource in specified path: ' + hdfs_resource)

    return ', '.join(versioned_paths)
Пример #2
0
def _export_hdfs_model(hdfs_model_path, model_dir_hdfs, overwrite):
    """
    Exports a hdfs directory of model files to Hopsworks "Models" dataset

     Args:
        :hdfs_model_path: the path to the model files in hdfs
        :model_dir_hdfs: path to the directory in HDFS to put the model files
        :overwrite: boolean flag whether to overwrite in case a model already exists in the exported directory

    Returns:
           the path to the exported model files in HDFS
    """
    if hdfs.isdir(hdfs_model_path):
        for file_source_path in hdfs.ls(hdfs_model_path):
            model_name = file_source_path
            if constants.DELIMITERS.SLASH_DELIMITER in file_source_path:
                last_index = model_name.rfind(constants.DELIMITERS.SLASH_DELIMITER)
                model_name = model_name[last_index + 1:]
            dest_path = model_dir_hdfs + constants.DELIMITERS.SLASH_DELIMITER + model_name
            hdfs.cp(file_source_path, dest_path, overwrite=overwrite)
    elif hdfs.isfile(hdfs_model_path):
        model_name = hdfs_model_path
        if constants.DELIMITERS.SLASH_DELIMITER in hdfs_model_path:
            last_index = model_name.rfind(constants.DELIMITERS.SLASH_DELIMITER)
            model_name = model_name[last_index + 1:]
        dest_path = model_dir_hdfs + constants.DELIMITERS.SLASH_DELIMITER + model_name
        hdfs.cp(hdfs_model_path, dest_path, overwrite=overwrite)

    return model_dir_hdfs
Пример #3
0
def _upload_file_output(retval, hdfs_exec_logdir):
    if type(retval) is dict:
        for metric_key in retval.keys():
            value = str(retval[metric_key])
            if '/' in value or os.path.exists(os.getcwd() + '/' + value):
                if os.path.exists(value):  # absolute path
                    if hdfs.exists(hdfs_exec_logdir + '/' +
                                   value.split('/')[-1]):
                        hdfs.delete(hdfs_exec_logdir + '/' +
                                    value.split('/')[-1],
                                    recursive=False)
                    pydoop.hdfs.put(value, hdfs_exec_logdir)
                    os.remove(value)
                    hdfs_exec_logdir = hdfs.abs_path(hdfs_exec_logdir)
                    retval[metric_key] = hdfs_exec_logdir[
                        len(hdfs.abs_path(hdfs.project_path())
                            ):] + '/' + value.split('/')[-1]
                elif os.path.exists(os.getcwd() + '/' +
                                    value):  # relative path
                    output_file = os.getcwd() + '/' + value
                    if hdfs.exists(hdfs_exec_logdir + '/' + value):
                        hdfs.delete(hdfs_exec_logdir + '/' + value,
                                    recursive=False)
                    pydoop.hdfs.put(value, hdfs_exec_logdir)
                    os.remove(output_file)
                    hdfs_exec_logdir = hdfs.abs_path(hdfs_exec_logdir)
                    retval[metric_key] = hdfs_exec_logdir[
                        len(hdfs.abs_path(hdfs.project_path())
                            ):] + '/' + output_file.split('/')[-1]
                elif value.startswith('Experiments') and value.endswith(
                        'output.log'):
                    continue
                elif value.startswith('Experiments') and hdfs.exists(
                        hdfs.project_path() + '/' + value):
                    hdfs.cp(hdfs.project_path() + '/' + value,
                            hdfs_exec_logdir)
                else:
                    raise Exception(
                        'Could not find file or directory on path ' +
                        str(value))