def validate_server_directory_upload(trans, server_dir): if server_dir in [None, 'None', '']: raise RequestParameterInvalidException("Invalid or unspecified server_dir parameter") if trans.user_is_admin: import_dir = trans.app.config.library_import_dir import_dir_desc = 'library_import_dir' if not import_dir: raise ConfigDoesNotAllowException('"library_import_dir" is not set in the Galaxy configuration') else: import_dir = trans.app.config.user_library_import_dir if not import_dir: raise ConfigDoesNotAllowException('"user_library_import_dir" is not set in the Galaxy configuration') if server_dir != trans.user.email: import_dir = os.path.join(import_dir, trans.user.email) import_dir_desc = 'user_library_import_dir' full_dir = os.path.join(import_dir, server_dir) unsafe = None if safe_relpath(server_dir): username = trans.user.username if trans.app.config.user_library_import_check_permissions else None if import_dir_desc == 'user_library_import_dir' and safe_contains(import_dir, full_dir, whitelist=trans.app.config.user_library_import_symlink_whitelist): for unsafe in unsafe_walk(full_dir, whitelist=[import_dir] + trans.app.config.user_library_import_symlink_whitelist, username=username): log.error('User attempted to import a path that resolves to a path outside of their import dir: %s -> %s', unsafe, os.path.realpath(unsafe)) else: log.error('User attempted to import a directory path that resolves to a path outside of their import dir: %s -> %s', server_dir, os.path.realpath(full_dir)) unsafe = True if unsafe: raise RequestParameterInvalidException("Invalid server_dir specified") return full_dir, import_dir_desc
def _construct_path(self, obj, base_dir=None, dir_only=None, extra_dir=None, extra_dir_at_root=False, alt_name=None, obj_dir=False, **kwargs): # extra_dir should never be constructed from provided data but just # make sure there are no shenannigans afoot if extra_dir and extra_dir != os.path.normpath(extra_dir): log.warning('extra_dir is not normalized: %s', extra_dir) raise ObjectInvalid("The requested object is invalid") # ensure that any parent directory references in alt_name would not # result in a path not contained in the directory path constructed here if alt_name: if not safe_relpath(alt_name): log.warning('alt_name would locate path outside dir: %s', alt_name) raise ObjectInvalid("The requested object is invalid") # alt_name can contain parent directory references, but S3 will not # follow them, so if they are valid we normalize them out alt_name = os.path.normpath(alt_name) rel_path = os.path.join(*directory_hash_id(self._get_object_id(obj))) if extra_dir is not None: if extra_dir_at_root: rel_path = os.path.join(extra_dir, rel_path) else: rel_path = os.path.join(rel_path, extra_dir) # for JOB_WORK directory if obj_dir: rel_path = os.path.join(rel_path, str(self._get_object_id(obj))) if base_dir: base = self.extra_dirs.get(base_dir) return os.path.join(base, rel_path) # S3 folders are marked by having trailing '/' so add it now rel_path = '%s/' % rel_path if not dir_only: rel_path = os.path.join(rel_path, alt_name if alt_name else "dataset_%s.dat" % self._get_object_id(obj)) return rel_path
def safemembers(self): members = self.archive if self.file_type == "tar": for finfo in members: if not safe_relpath(finfo.name): raise Exception(finfo.name + " is blocked (illegal path).") elif (finfo.issym() or finfo.islnk()) and not safe_relpath(finfo.linkname): raise Exception(finfo.name + " is blocked.") else: yield finfo elif self.file_type == "zip": for name in members.namelist(): if not safe_relpath(name): raise Exception(name + " is blocked (illegal path).") else: yield name
def _construct_path(self, obj, base_dir=None, dir_only=None, extra_dir=None, extra_dir_at_root=False, alt_name=None, obj_dir=False, **kwargs): # extra_dir should never be constructed from provided data but just # make sure there are no shenannigans afoot if extra_dir and extra_dir != os.path.normpath(extra_dir): log.warning('extra_dir is not normalized: %s', extra_dir) raise ObjectInvalid("The requested object is invalid") # ensure that any parent directory references in alt_name would not # result in a path not contained in the directory path constructed here if alt_name: if not safe_relpath(alt_name): log.warning('alt_name would locate path outside dir: %s', alt_name) raise ObjectInvalid("The requested object is invalid") # alt_name can contain parent directory references, but S3 will not # follow them, so if they are valid we normalize them out alt_name = os.path.normpath(alt_name) rel_path = os.path.join(*directory_hash_id(obj.id)) if extra_dir is not None: if extra_dir_at_root: rel_path = os.path.join(extra_dir, rel_path) else: rel_path = os.path.join(rel_path, extra_dir) # for JOB_WORK directory if obj_dir: rel_path = os.path.join(rel_path, str(obj.id)) if base_dir: base = self.extra_dirs.get(base_dir) return os.path.join(base, rel_path) # S3 folders are marked by having trailing '/' so add it now rel_path = '%s/' % rel_path if not dir_only: rel_path = os.path.join(rel_path, alt_name if alt_name else "dataset_%s.dat" % obj.id) return rel_path
def validate_server_directory_upload(trans, server_dir): if server_dir in [None, 'None', '']: raise RequestParameterInvalidException("Invalid or unspecified server_dir parameter") if trans.user_is_admin(): import_dir = trans.app.config.library_import_dir import_dir_desc = 'library_import_dir' if not import_dir: raise ConfigDoesNotAllowException('"library_import_dir" is not set in the Galaxy configuration') else: import_dir = trans.app.config.user_library_import_dir if not import_dir: raise ConfigDoesNotAllowException('"user_library_import_dir" is not set in the Galaxy configuration') if server_dir != trans.user.email: import_dir = os.path.join(import_dir, trans.user.email) import_dir_desc = 'user_library_import_dir' full_dir = os.path.join(import_dir, server_dir) unsafe = None if safe_relpath(server_dir): username = trans.user.username if trans.app.config.user_library_import_check_permissions else None if import_dir_desc == 'user_library_import_dir' and safe_contains(import_dir, full_dir, whitelist=trans.app.config.user_library_import_symlink_whitelist, username=username): for unsafe in unsafe_walk(full_dir, whitelist=[import_dir] + trans.app.config.user_library_import_symlink_whitelist): log.error('User attempted to import a path that resolves to a path outside of their import dir: %s -> %s', unsafe, os.path.realpath(unsafe)) else: log.error('User attempted to import a directory path that resolves to a path outside of their import dir: %s -> %s', server_dir, os.path.realpath(full_dir)) unsafe = True if unsafe: raise RequestParameterInvalidException("Invalid server_dir specified") return full_dir, import_dir_desc
def __get_rods_path(self, obj, base_dir=None, dir_only=False, extra_dir=None, extra_dir_at_root=False, alt_name=None, strip_dat=True, **kwargs): # extra_dir should never be constructed from provided data but just # make sure there are no shenannigans afoot if extra_dir and extra_dir != os.path.normpath(extra_dir): log.warning('extra_dir is not normalized: %s', extra_dir) raise ObjectInvalid("The requested object is invalid") # ensure that any parent directory references in alt_name would not # result in a path not contained in the directory path constructed here if alt_name: if not safe_relpath(alt_name): log.warning('alt_name would locate path outside dir: %s', alt_name) raise ObjectInvalid("The requested object is invalid") # alt_name can contain parent directory references, but iRODS will # not follow them, so if they are valid we normalize them out alt_name = os.path.normpath(alt_name) path = "" if extra_dir is not None: path = extra_dir # extra_dir_at_root is ignored - since the iRODS plugin does not use # the directory hash, there is only one level of subdirectory. if not dir_only: # the .dat extension is stripped when stored in iRODS # TODO: is the strip_dat kwarg the best way to implement this? if strip_dat and alt_name and alt_name.endswith('.dat'): alt_name = os.path.splitext(alt_name)[0] default_name = 'dataset_%s' % obj.id if not strip_dat: default_name += '.dat' path = path_join(path, alt_name if alt_name else default_name) path = path_join(self.root_collection_path, path) return path
def safemembers(self): members = self.archive common_prefix_dir = self.common_prefix_dir if self.file_type == "tar": for finfo in members: if not safe_relpath(finfo.name): raise Exception(f"Path '{finfo.name}' is blocked (illegal path).") if finfo.issym() or finfo.islnk(): link_target = os.path.join(os.path.dirname(finfo.name), finfo.linkname) if not safe_relpath(link_target) or not os.path.normpath(link_target).startswith(common_prefix_dir): raise Exception(f"Link '{finfo.name}' to '{finfo.linkname}' is blocked.") yield finfo elif self.file_type == "zip": for name in members.namelist(): if not safe_relpath(name): raise Exception(f"{name} is blocked (illegal path).") yield name
def check_archive(repository, archive): valid = [] invalid = [] errors = [] undesirable_files = [] undesirable_dirs = [] for member in archive.getmembers(): # Allow regular files and directories only if not (member.isdir() or member.isfile() or member.islnk()): errors.append( "Uploaded archives can only include regular directories and files (no symbolic links, devices, etc)." ) invalid.append(member) continue if not safe_relpath(member.name): errors.append( "Uploaded archives cannot contain files that would extract outside of the archive." ) invalid.append(member) continue if os.path.basename(member.name) in UNDESIRABLE_FILES: undesirable_files.append(member) continue head = tail = member.name found_undesirable_dir = False while tail: head, tail = os.path.split(head) if tail in UNDESIRABLE_DIRS: undesirable_dirs.append(member) found_undesirable_dir = True break if found_undesirable_dir: continue if repository.type == rt_util.REPOSITORY_SUITE_DEFINITION and member.name != rt_util.REPOSITORY_DEPENDENCY_DEFINITION_FILENAME: errors.append( 'Repositories of type <b>Repository suite definition</b> can contain only a single file named <b>repository_dependencies.xml</b>.' ) invalid.append(member) continue if repository.type == rt_util.TOOL_DEPENDENCY_DEFINITION and member.name != rt_util.TOOL_DEPENDENCY_DEFINITION_FILENAME: errors.append( 'Repositories of type <b>Tool dependency definition</b> can contain only a single file named <b>tool_dependencies.xml</b>.' ) invalid.append(member) continue valid.append(member) ArchiveCheckResults = namedtuple('ArchiveCheckResults', [ 'valid', 'invalid', 'undesirable_files', 'undesirable_dirs', 'errors' ]) return ArchiveCheckResults(valid, invalid, undesirable_files, undesirable_dirs, errors)
def _construct_path(self, obj, base_dir=None, dir_only=None, extra_dir=None, extra_dir_at_root=False, alt_name=None, obj_dir=False, **kwargs): """Construct path from object and parameters""" # param extra_dir: should never be constructed from provided data but # just make sure there are no shenannigans afoot if extra_dir and extra_dir != os.path.normpath(extra_dir): log.warning('extra_dir is not normalized: {0}'.format(extra_dir)) raise ObjectInvalid("The requested object is invalid") # ensure that any parent directory references in alt_name would not # result in a path not contained in the directory path constructed here if alt_name: if not safe_relpath(alt_name): log.warning( 'alt_name would locate path outside dir: {0}'.format( alt_name)) raise ObjectInvalid("The requested object is invalid") # alt_name can contain parent directory references, but S3 will not # follow them, so if they are valid we normalize them out alt_name = os.path.normpath(alt_name) rel_path = os.path.join(*directory_hash_id(obj.id)) if extra_dir is not None: if extra_dir_at_root: rel_path = os.path.join(extra_dir, rel_path) else: rel_path = os.path.join(rel_path, extra_dir) # for JOB_WORK directory if obj_dir: rel_path = os.path.join(rel_path, str(obj.id)) if base_dir: base = self.extra_dirs.get(base_dir) return os.path.join(base, rel_path) # Pithos+ folders are marked by having trailing '/' so add it now rel_path = '{0}/'.format(rel_path) if not dir_only: an = alt_name if alt_name else 'dataset_{0}.dat'.format(obj.id) rel_path = os.path.join(rel_path, an) return rel_path
def check_archive(repository, archive): valid = [] invalid = [] errors = [] undesirable_files = [] undesirable_dirs = [] for member in archive.getmembers(): # Allow regular files and directories only if not (member.isdir() or member.isfile() or member.islnk()): errors.append("Uploaded archives can only include regular directories and files (no symbolic links, devices, etc).") invalid.append(member) continue if not safe_relpath(member.name): errors.append("Uploaded archives cannot contain files that would extract outside of the archive.") invalid.append(member) continue if os.path.basename(member.name) in UNDESIRABLE_FILES: undesirable_files.append(member) continue head = tail = member.name try: while tail: head, tail = os.path.split(head) if tail in UNDESIRABLE_DIRS: undesirable_dirs.append(member) assert False except AssertionError: continue if repository.type == rt_util.REPOSITORY_SUITE_DEFINITION and member.name != rt_util.REPOSITORY_DEPENDENCY_DEFINITION_FILENAME: errors.append('Repositories of type <b>Repository suite definition</b> can contain only a single file named <b>repository_dependencies.xml</b>.') invalid.append(member) continue if repository.type == rt_util.TOOL_DEPENDENCY_DEFINITION and member.name != rt_util.TOOL_DEPENDENCY_DEFINITION_FILENAME: errors.append('Repositories of type <b>Tool dependency definition</b> can contain only a single file named <b>tool_dependencies.xml</b>.') invalid.append(member) continue valid.append(member) ArchiveCheckResults = namedtuple('ArchiveCheckResults', ['valid', 'invalid', 'undesirable_files', 'undesirable_dirs', 'errors']) return ArchiveCheckResults(valid, invalid, undesirable_files, undesirable_dirs, errors)
def _construct_path(self, obj, old_style=False, base_dir=None, dir_only=False, extra_dir=None, extra_dir_at_root=False, alt_name=None, obj_dir=False, **kwargs): """ Construct the absolute path for accessing the object identified by `obj.id`. :type base_dir: string :param base_dir: A key in self.extra_dirs corresponding to the base directory in which this object should be created, or None to specify the default directory. :type dir_only: boolean :param dir_only: If True, check only the path where the file identified by `obj` should be located, not the dataset itself. This option applies to `extra_dir` argument as well. :type extra_dir: string :param extra_dir: Append the value of this parameter to the expected path used to access the object identified by `obj` (e.g., /files/000/<extra_dir>/dataset_10.dat). :type alt_name: string :param alt_name: Use this name as the alternative name for the returned dataset rather than the default. :type old_style: boolean param old_style: This option is used for backward compatibility. If `True` then the composed directory structure does not include a hash id (e.g., /files/dataset_10.dat (old) vs. /files/000/dataset_10.dat (new)) """ base = os.path.abspath(self.extra_dirs.get(base_dir, self.file_path)) # extra_dir should never be constructed from provided data but just # make sure there are no shenannigans afoot if extra_dir and extra_dir != os.path.normpath(extra_dir): log.warning('extra_dir is not normalized: %s', extra_dir) raise ObjectInvalid("The requested object is invalid") # ensure that any parent directory references in alt_name would not # result in a path not contained in the directory path constructed here if alt_name and not safe_relpath(alt_name): log.warning('alt_name would locate path outside dir: %s', alt_name) raise ObjectInvalid("The requested object is invalid") obj_id = self._get_object_id(obj) if old_style: if extra_dir is not None: path = os.path.join(base, extra_dir) else: path = base else: # Construct hashed path rel_path = os.path.join(*directory_hash_id(obj_id)) # Create a subdirectory for the object ID if obj_dir: rel_path = os.path.join(rel_path, str(obj_id)) # Optionally append extra_dir if extra_dir is not None: if extra_dir_at_root: rel_path = os.path.join(extra_dir, rel_path) else: rel_path = os.path.join(rel_path, extra_dir) path = os.path.join(base, rel_path) if not dir_only: assert obj_id is not None, "The effective dataset identifier consumed by object store [%s] must be set before a path can be constructed." % ( self.store_by) path = os.path.join( path, alt_name if alt_name else "dataset_%s.dat" % obj_id) return os.path.abspath(path)
def load(self, trans, payload=None, **kwd): """ Load dataset(s) from the given source into the library. * POST /api/libraries/datasets :param payload: dictionary structure containing: :param encoded_folder_id: the encoded id of the folder to import dataset(s) to :type encoded_folder_id: an encoded id string :param source: source the datasets should be loaded from Source can be: user directory - root folder specified in galaxy.ini as "$user_library_import_dir" example path: path/to/galaxy/$user_library_import_dir/[email protected]/{user can browse everything here} the folder with the user login has to be created beforehand (admin)import directory - root folder specified in galaxy ini as "$library_import_dir" example path: path/to/galaxy/$library_import_dir/{admin can browse everything here} (admin)any absolute or relative path - option allowed with "allow_library_path_paste" in galaxy.ini :type source: str :param link_data: flag whether to link the dataset to data or copy it to Galaxy, defaults to copy while linking is set to True all symlinks will be resolved _once_ :type link_data: bool :param preserve_dirs: flag whether to preserve the directory structure when importing dir if False only datasets will be imported :type preserve_dirs: bool :param file_type: file type of the loaded datasets, defaults to 'auto' (autodetect) :type file_type: str :param dbkey: dbkey of the loaded genome, defaults to '?' (unknown) :type dbkey: str :param tag_using_filenames: flag whether to generate dataset tags from filenames :type tag_using_filenames: bool :type dictionary :returns: dict containing information about the created upload job :rtype: dictionary :raises: RequestParameterMissingException, AdminRequiredException, ConfigDoesNotAllowException, RequestParameterInvalidException InsufficientPermissionsException, ObjectNotFound """ if payload: kwd.update(payload) kwd['space_to_tab'] = False kwd['to_posix_lines'] = True kwd['dbkey'] = kwd.get('dbkey', '?') kwd['file_type'] = kwd.get('file_type', 'auto') kwd['link_data_only'] = 'link_to_files' if util.string_as_bool(kwd.get('link_data', False)) else 'copy_files' kwd['tag_using_filenames'] = util.string_as_bool(kwd.get('tag_using_filenames', None)) encoded_folder_id = kwd.get('encoded_folder_id', None) if encoded_folder_id is not None: folder_id = self.folder_manager.cut_and_decode(trans, encoded_folder_id) else: raise exceptions.RequestParameterMissingException('The required attribute encoded_folder_id is missing.') path = kwd.get('path', None) if path is None: raise exceptions.RequestParameterMissingException('The required attribute path is missing.') folder = self.folder_manager.get(trans, folder_id) source = kwd.get('source', None) if source not in ['userdir_file', 'userdir_folder', 'importdir_file', 'importdir_folder', 'admin_path']: raise exceptions.RequestParameterMissingException('You have to specify "source" parameter. Possible values are "userdir_file", "userdir_folder", "admin_path", "importdir_file" and "importdir_folder". ') elif source in ['importdir_file', 'importdir_folder']: if not trans.user_is_admin(): raise exceptions.AdminRequiredException('Only admins can import from importdir.') if not trans.app.config.library_import_dir: raise exceptions.ConfigDoesNotAllowException('The configuration of this Galaxy instance does not allow admins to import into library from importdir.') import_base_dir = trans.app.config.library_import_dir if not safe_relpath(path): # admins shouldn't be able to explicitly specify a path outside server_dir, but symlinks are allowed. # the reasoning here is that galaxy admins may not have direct filesystem access or can only access # library_import_dir via FTP (which cannot create symlinks), and may rely on sysadmins to set up the # import directory. if they have filesystem access, all bets are off. raise exceptions.RequestParameterInvalidException('The given path is invalid.') path = os.path.join(import_base_dir, path) elif source in ['userdir_file', 'userdir_folder']: unsafe = None user_login = trans.user.email user_base_dir = trans.app.config.user_library_import_dir if user_base_dir is None: raise exceptions.ConfigDoesNotAllowException('The configuration of this Galaxy instance does not allow upload from user directories.') full_dir = os.path.join(user_base_dir, user_login) if not safe_contains(full_dir, path, whitelist=trans.app.config.user_library_import_symlink_whitelist): # the path is a symlink outside the user dir path = os.path.join(full_dir, path) log.error('User attempted to import a path that resolves to a path outside of their import dir: %s -> %s', path, os.path.realpath(path)) raise exceptions.RequestParameterInvalidException('The given path is invalid.') path = os.path.join(full_dir, path) for unsafe in unsafe_walk(path, whitelist=[full_dir] + trans.app.config.user_library_import_symlink_whitelist): # the path is a dir and contains files that symlink outside the user dir log.error('User attempted to import a directory containing a path that resolves to a path outside of their import dir: %s -> %s', unsafe, os.path.realpath(unsafe)) if unsafe: raise exceptions.RequestParameterInvalidException('The given path is invalid.') if not os.path.exists(path): raise exceptions.RequestParameterInvalidException('Given path does not exist on the host.') if not self.folder_manager.can_add_item(trans, folder): raise exceptions.InsufficientPermissionsException('You do not have proper permission to add items to the given folder.') elif source == 'admin_path': if not trans.app.config.allow_library_path_paste: raise exceptions.ConfigDoesNotAllowException('The configuration of this Galaxy instance does not allow admins to import into library from path.') if not trans.user_is_admin(): raise exceptions.AdminRequiredException('Only admins can import from path.') # Set up the traditional tool state/params tool_id = 'upload1' tool = trans.app.toolbox.get_tool(tool_id) state = tool.new_state(trans) populate_state(trans, tool.inputs, kwd, state.inputs) tool_params = state.inputs dataset_upload_inputs = [] for input in tool.inputs.itervalues(): if input.type == "upload_dataset": dataset_upload_inputs.append(input) library_bunch = upload_common.handle_library_params(trans, {}, trans.security.encode_id(folder.id)) abspath_datasets = [] kwd['filesystem_paths'] = path if source in ['importdir_folder']: kwd['filesystem_paths'] = os.path.join(import_base_dir, path) # user wants to import one file only elif source in ["userdir_file", "importdir_file"]: file = os.path.abspath(path) abspath_datasets.append(trans.webapp.controllers['library_common'].make_library_uploaded_dataset( trans, 'api', kwd, os.path.basename(file), file, 'server_dir', library_bunch)) # user wants to import whole folder elif source == "userdir_folder": uploaded_datasets_bunch = trans.webapp.controllers['library_common'].get_path_paste_uploaded_datasets( trans, 'api', kwd, library_bunch, 200, '') uploaded_datasets = uploaded_datasets_bunch[0] if uploaded_datasets is None: raise exceptions.ObjectNotFound('Given folder does not contain any datasets.') for ud in uploaded_datasets: ud.path = os.path.abspath(ud.path) abspath_datasets.append(ud) # user wants to import from path if source in ["admin_path", "importdir_folder"]: # validate the path is within root uploaded_datasets_bunch = trans.webapp.controllers['library_common'].get_path_paste_uploaded_datasets( trans, 'api', kwd, library_bunch, 200, '') uploaded_datasets = uploaded_datasets_bunch[0] if uploaded_datasets is None: raise exceptions.ObjectNotFound('Given folder does not contain any datasets.') for ud in uploaded_datasets: ud.path = os.path.abspath(ud.path) abspath_datasets.append(ud) json_file_path = upload_common.create_paramfile(trans, abspath_datasets) data_list = [ud.data for ud in abspath_datasets] job_params = {} job_params['link_data_only'] = dumps(kwd.get('link_data_only', 'copy_files')) job_params['uuid'] = dumps(kwd.get('uuid', None)) job, output = upload_common.create_job(trans, tool_params, tool, json_file_path, data_list, folder=folder, job_params=job_params) trans.sa_session.add(job) trans.sa_session.flush() job_dict = job.to_dict() job_dict['id'] = trans.security.encode_id(job_dict['id']) return job_dict
def load(self, trans, payload=None, **kwd): """ POST /api/libraries/datasets Load dataset(s) from the given source into the library. :param payload: dictionary structure containing: :param encoded_folder_id: the encoded id of the folder to import dataset(s) to :type encoded_folder_id: an encoded id string :param source: source the datasets should be loaded from. Source can be: - user directory root folder specified in galaxy.ini as "$user_library_import_dir" example path: path/to/galaxy/$user_library_import_dir/[email protected]/{user can browse everything here} the folder with the user login has to be created beforehand - (admin)import directory root folder specified in galaxy ini as "$library_import_dir" example path: path/to/galaxy/$library_import_dir/{admin can browse everything here} - (admin)any absolute or relative path option allowed with "allow_library_path_paste" in galaxy.ini :type source: str :param link_data: flag whether to link the dataset to data or copy it to Galaxy, defaults to copy while linking is set to True all symlinks will be resolved _once_ :type link_data: bool :param preserve_dirs: flag whether to preserve the directory structure when importing dir if False only datasets will be imported :type preserve_dirs: bool :param file_type: file type of the loaded datasets, defaults to 'auto' (autodetect) :type file_type: str :param dbkey: dbkey of the loaded genome, defaults to '?' (unknown) :type dbkey: str :param tag_using_filenames: flag whether to generate dataset tags from filenames :type tag_using_filenames: bool :type dictionary :returns: dict containing information about the created upload job :rtype: dictionary :raises: RequestParameterMissingException, AdminRequiredException, ConfigDoesNotAllowException, RequestParameterInvalidException InsufficientPermissionsException, ObjectNotFound """ if payload: kwd.update(payload) kwd['space_to_tab'] = False kwd['to_posix_lines'] = True kwd['dbkey'] = kwd.get('dbkey', '?') kwd['file_type'] = kwd.get('file_type', 'auto') kwd['link_data_only'] = 'link_to_files' if util.string_as_bool(kwd.get('link_data', False)) else 'copy_files' kwd['tag_using_filenames'] = util.string_as_bool(kwd.get('tag_using_filenames', None)) encoded_folder_id = kwd.get('encoded_folder_id', None) if encoded_folder_id is not None: folder_id = self.folder_manager.cut_and_decode(trans, encoded_folder_id) else: raise exceptions.RequestParameterMissingException('The required attribute encoded_folder_id is missing.') path = kwd.get('path', None) if path is None: raise exceptions.RequestParameterMissingException('The required attribute path is missing.') if not isinstance(path, str): raise exceptions.RequestParameterInvalidException('The required attribute path is not String.') folder = self.folder_manager.get(trans, folder_id) source = kwd.get('source', None) if source not in ['userdir_file', 'userdir_folder', 'importdir_file', 'importdir_folder', 'admin_path']: raise exceptions.RequestParameterMissingException('You have to specify "source" parameter. Possible values are "userdir_file", "userdir_folder", "admin_path", "importdir_file" and "importdir_folder". ') elif source in ['importdir_file', 'importdir_folder']: if not trans.user_is_admin: raise exceptions.AdminRequiredException('Only admins can import from importdir.') if not trans.app.config.library_import_dir: raise exceptions.ConfigDoesNotAllowException('The configuration of this Galaxy instance does not allow admins to import into library from importdir.') import_base_dir = trans.app.config.library_import_dir if not safe_relpath(path): # admins shouldn't be able to explicitly specify a path outside server_dir, but symlinks are allowed. # the reasoning here is that galaxy admins may not have direct filesystem access or can only access # library_import_dir via FTP (which cannot create symlinks), and may rely on sysadmins to set up the # import directory. if they have filesystem access, all bets are off. raise exceptions.RequestParameterInvalidException('The given path is invalid.') path = os.path.join(import_base_dir, path) elif source in ['userdir_file', 'userdir_folder']: username = trans.user.username if trans.app.config.user_library_import_check_permissions else None user_login = trans.user.email user_base_dir = trans.app.config.user_library_import_dir if user_base_dir is None: raise exceptions.ConfigDoesNotAllowException('The configuration of this Galaxy instance does not allow upload from user directories.') full_dir = os.path.join(user_base_dir, user_login) if not safe_contains(full_dir, path, allowlist=trans.app.config.user_library_import_symlink_allowlist): # the path is a symlink outside the user dir path = os.path.join(full_dir, path) log.error('User attempted to import a path that resolves to a path outside of their import dir: %s -> %s', path, os.path.realpath(path)) raise exceptions.RequestParameterInvalidException('The given path is invalid.') if trans.app.config.user_library_import_check_permissions and not full_path_permission_for_user(full_dir, path, username): log.error('User attempted to import a path that resolves to a path outside of their import dir: ' '%s -> %s and cannot be read by them.', path, os.path.realpath(path)) raise exceptions.RequestParameterInvalidException('The given path is invalid.') path = os.path.join(full_dir, path) if unsafe_walk(path, allowlist=[full_dir] + trans.app.config.user_library_import_symlink_allowlist, username=username): # the path is a dir and contains files that symlink outside the user dir error = 'User attempted to import a path that resolves to a path outside of their import dir: {} -> {}'.format( path, os.path.realpath(path) ) if trans.app.config.user_library_import_check_permissions: error += ' or is not readable for them.' log.error(error) raise exceptions.RequestParameterInvalidException('The given path is invalid.') if not os.path.exists(path): raise exceptions.RequestParameterInvalidException('Given path does not exist on the host.') if not self.folder_manager.can_add_item(trans, folder): raise exceptions.InsufficientPermissionsException('You do not have proper permission to add items to the given folder.') elif source == 'admin_path': if not trans.app.config.allow_library_path_paste: raise exceptions.ConfigDoesNotAllowException('The configuration of this Galaxy instance does not allow admins to import into library from path.') if not trans.user_is_admin: raise exceptions.AdminRequiredException('Only admins can import from path.') # Set up the traditional tool state/params tool_id = 'upload1' tool = trans.app.toolbox.get_tool(tool_id) state = tool.new_state(trans) populate_state(trans, tool.inputs, kwd, state.inputs) tool_params = state.inputs dataset_upload_inputs = [] for input in tool.inputs.values(): if input.type == "upload_dataset": dataset_upload_inputs.append(input) library_bunch = upload_common.handle_library_params(trans, {}, trans.security.encode_id(folder.id)) abspath_datasets = [] kwd['filesystem_paths'] = path if source in ['importdir_folder']: kwd['filesystem_paths'] = os.path.join(import_base_dir, path) # user wants to import one file only elif source in ["userdir_file", "importdir_file"]: file = os.path.abspath(path) abspath_datasets.append(self._make_library_uploaded_dataset( trans, kwd, os.path.basename(file), file, 'server_dir', library_bunch)) # user wants to import whole folder elif source == "userdir_folder": uploaded_datasets_bunch = self._get_path_paste_uploaded_datasets( trans, kwd, library_bunch, 200, '') uploaded_datasets = uploaded_datasets_bunch[0] if uploaded_datasets is None: raise exceptions.ObjectNotFound('Given folder does not contain any datasets.') for ud in uploaded_datasets: ud.path = os.path.abspath(ud.path) abspath_datasets.append(ud) # user wants to import from path if source in ["admin_path", "importdir_folder"]: # validate the path is within root uploaded_datasets_bunch = self._get_path_paste_uploaded_datasets( trans, kwd, library_bunch, 200, '') uploaded_datasets = uploaded_datasets_bunch[0] if uploaded_datasets is None: raise exceptions.ObjectNotFound('Given folder does not contain any datasets.') for ud in uploaded_datasets: ud.path = os.path.abspath(ud.path) abspath_datasets.append(ud) json_file_path = upload_common.create_paramfile(trans, abspath_datasets) data_list = [ud.data for ud in abspath_datasets] job_params = {} job_params['link_data_only'] = dumps(kwd.get('link_data_only', 'copy_files')) job_params['uuid'] = dumps(kwd.get('uuid', None)) job, output = upload_common.create_job(trans, tool_params, tool, json_file_path, data_list, folder=folder, job_params=job_params) trans.app.job_manager.enqueue(job, tool=tool) job_dict = job.to_dict() job_dict['id'] = trans.security.encode_id(job_dict['id']) return job_dict
def _construct_path(self, obj, old_style=False, base_dir=None, dir_only=False, extra_dir=None, extra_dir_at_root=False, alt_name=None, obj_dir=False, **kwargs): """ Construct the absolute path for accessing the object identified by `obj.id`. :type base_dir: string :param base_dir: A key in self.extra_dirs corresponding to the base directory in which this object should be created, or None to specify the default directory. :type dir_only: boolean :param dir_only: If True, check only the path where the file identified by `obj` should be located, not the dataset itself. This option applies to `extra_dir` argument as well. :type extra_dir: string :param extra_dir: Append the value of this parameter to the expected path used to access the object identified by `obj` (e.g., /files/000/<extra_dir>/dataset_10.dat). :type alt_name: string :param alt_name: Use this name as the alternative name for the returned dataset rather than the default. :type old_style: boolean param old_style: This option is used for backward compatibility. If `True` then the composed directory structure does not include a hash id (e.g., /files/dataset_10.dat (old) vs. /files/000/dataset_10.dat (new)) """ base = os.path.abspath(self.extra_dirs.get(base_dir, self.file_path)) # extra_dir should never be constructed from provided data but just # make sure there are no shenannigans afoot if extra_dir and extra_dir != os.path.normpath(extra_dir): log.warning('extra_dir is not normalized: %s', extra_dir) raise ObjectInvalid("The requested object is invalid") # ensure that any parent directory references in alt_name would not # result in a path not contained in the directory path constructed here if alt_name and not safe_relpath(alt_name): log.warning('alt_name would locate path outside dir: %s', alt_name) raise ObjectInvalid("The requested object is invalid") if old_style: if extra_dir is not None: path = os.path.join(base, extra_dir) else: path = base else: # Construct hashed path rel_path = os.path.join(*directory_hash_id(obj.id)) # Create a subdirectory for the object ID if obj_dir: rel_path = os.path.join(rel_path, str(obj.id)) # Optionally append extra_dir if extra_dir is not None: if extra_dir_at_root: rel_path = os.path.join(extra_dir, rel_path) else: rel_path = os.path.join(rel_path, extra_dir) path = os.path.join(base, rel_path) if not dir_only: path = os.path.join(path, alt_name if alt_name else "dataset_%s.dat" % obj.id) return os.path.abspath(path)