def _version_resources(versioned_resources, rundir): """ Args: versioned_resources: rundir: Returns: """ if not versioned_resources: return None pyhdfs_handle = hdfs.get() pyhdfs_handle.create_directory(rundir) endpoint_prefix = hdfs.project_path() versioned_paths = [] for hdfs_resource in versioned_resources: if pydoop.hdfs.path.exists(hdfs_resource): log("Versoning resource '%s' in rundir '%s'" % (hdfs_resource, rundir)) # Remove the file if it exists target_path = os.path.join(rundir, os.path.basename(hdfs_resource)) if hdfs.exists(target_path): hdfs.rmr(target_path) hdfs.cp(hdfs_resource, rundir) path, filename = os.path.split(hdfs_resource) versioned_paths.append( rundir.replace(endpoint_prefix, '') + '/' + filename) else: log("Resource not found '%s'" % hdfs_resource, level='warning') #raise Exception('Could not find resource in specified path: ' + hdfs_resource) return ', '.join(versioned_paths)
def _export_hdfs_model(hdfs_model_path, model_dir_hdfs, overwrite): """ Exports a hdfs directory of model files to Hopsworks "Models" dataset Args: :hdfs_model_path: the path to the model files in hdfs :model_dir_hdfs: path to the directory in HDFS to put the model files :overwrite: boolean flag whether to overwrite in case a model already exists in the exported directory Returns: the path to the exported model files in HDFS """ if hdfs.isdir(hdfs_model_path): for file_source_path in hdfs.ls(hdfs_model_path): model_name = file_source_path if constants.DELIMITERS.SLASH_DELIMITER in file_source_path: last_index = model_name.rfind(constants.DELIMITERS.SLASH_DELIMITER) model_name = model_name[last_index + 1:] dest_path = model_dir_hdfs + constants.DELIMITERS.SLASH_DELIMITER + model_name hdfs.cp(file_source_path, dest_path, overwrite=overwrite) elif hdfs.isfile(hdfs_model_path): model_name = hdfs_model_path if constants.DELIMITERS.SLASH_DELIMITER in hdfs_model_path: last_index = model_name.rfind(constants.DELIMITERS.SLASH_DELIMITER) model_name = model_name[last_index + 1:] dest_path = model_dir_hdfs + constants.DELIMITERS.SLASH_DELIMITER + model_name hdfs.cp(hdfs_model_path, dest_path, overwrite=overwrite) return model_dir_hdfs
def _upload_file_output(retval, hdfs_exec_logdir): if type(retval) is dict: for metric_key in retval.keys(): value = str(retval[metric_key]) if '/' in value or os.path.exists(os.getcwd() + '/' + value): if os.path.exists(value): # absolute path if hdfs.exists(hdfs_exec_logdir + '/' + value.split('/')[-1]): hdfs.delete(hdfs_exec_logdir + '/' + value.split('/')[-1], recursive=False) pydoop.hdfs.put(value, hdfs_exec_logdir) os.remove(value) hdfs_exec_logdir = hdfs.abs_path(hdfs_exec_logdir) retval[metric_key] = hdfs_exec_logdir[ len(hdfs.abs_path(hdfs.project_path()) ):] + '/' + value.split('/')[-1] elif os.path.exists(os.getcwd() + '/' + value): # relative path output_file = os.getcwd() + '/' + value if hdfs.exists(hdfs_exec_logdir + '/' + value): hdfs.delete(hdfs_exec_logdir + '/' + value, recursive=False) pydoop.hdfs.put(value, hdfs_exec_logdir) os.remove(output_file) hdfs_exec_logdir = hdfs.abs_path(hdfs_exec_logdir) retval[metric_key] = hdfs_exec_logdir[ len(hdfs.abs_path(hdfs.project_path()) ):] + '/' + output_file.split('/')[-1] elif value.startswith('Experiments') and value.endswith( 'output.log'): continue elif value.startswith('Experiments') and hdfs.exists( hdfs.project_path() + '/' + value): hdfs.cp(hdfs.project_path() + '/' + value, hdfs_exec_logdir) else: raise Exception( 'Could not find file or directory on path ' + str(value))