def __get_rods_path(self, obj, base_dir=None, dir_only=False, extra_dir=None, extra_dir_at_root=False, alt_name=None, strip_dat=True, **kwargs): # extra_dir should never be constructed from provided data but just # make sure there are no shenannigans afoot if extra_dir and extra_dir != os.path.normpath(extra_dir): log.warning('extra_dir is not normalized: %s', extra_dir) raise ObjectInvalid("The requested object is invalid") # ensure that any parent directory references in alt_name would not # result in a path not contained in the directory path constructed here if alt_name: if not safe_relpath(alt_name): log.warning('alt_name would locate path outside dir: %s', alt_name) raise ObjectInvalid("The requested object is invalid") # alt_name can contain parent directory references, but iRODS will # not follow them, so if they are valid we normalize them out alt_name = os.path.normpath(alt_name) path = "" if extra_dir is not None: path = extra_dir # extra_dir_at_root is ignored - since the iRODS plugin does not use # the directory hash, there is only one level of subdirectory. if not dir_only: # the .dat extension is stripped when stored in iRODS # TODO: is the strip_dat kwarg the best way to implement this? if strip_dat and alt_name and alt_name.endswith('.dat'): alt_name = os.path.splitext(alt_name)[0] default_name = 'dataset_%s' % obj.id if not strip_dat: default_name += '.dat' path = path_join(path, alt_name if alt_name else default_name) path = path_join(self.root_collection_path, path) return path
def __init__( self, config, file_path=None, extra_dirs=None ): super( IRODSObjectStore, self ).__init__( config, file_path=file_path, extra_dirs=extra_dirs ) assert irods is not None, IRODS_IMPORT_MESSAGE self.cache_path = config.object_store_cache_path self.default_resource = config.irods_default_resource or None # Connect to iRODS (AssertionErrors will be raised if anything goes wrong) self.rods_env, self.rods_conn = rods_connect() # if the root collection path in the config is unset or relative, try to use a sensible default if config.irods_root_collection_path is None or ( config.irods_root_collection_path is not None and not config.irods_root_collection_path.startswith( '/' ) ): rods_home = self.rods_env.rodsHome assert rods_home != '', "Unable to initialize iRODS Object Store: rodsHome cannot be determined and irods_root_collection_path in Galaxy config is unset or not absolute." if config.irods_root_collection_path is None: self.root_collection_path = path_join( rods_home, 'galaxy_data' ) else: self.root_collection_path = path_join( rods_home, config.irods_root_collection_path ) else: self.root_collection_path = config.irods_root_collection_path # will return a collection object regardless of whether it exists self.root_collection = irods.irodsCollection( self.rods_conn, self.root_collection_path ) if self.root_collection.getId() == -1: log.warning( "iRODS root collection does not exist, will attempt to create: %s", self.root_collection_path ) self.root_collection.upCollection() assert self.root_collection.createCollection( os.path.basename( self.root_collection_path ) ) == 0, "iRODS root collection creation failed: %s" % self.root_collection_path self.root_collection = irods.irodsCollection( self.rods_conn, self.root_collection_path ) assert self.root_collection.getId() != -1, "iRODS root collection creation claimed success but still does not exist" if self.default_resource is None: self.default_resource = self.rods_env.rodsDefResource log.info( "iRODS data for this instance will be stored in collection: %s, resource: %s", self.root_collection_path, self.default_resource )
def __get_rods_path( self, obj, base_dir=None, dir_only=False, extra_dir=None, extra_dir_at_root=False, alt_name=None, strip_dat=True, **kwargs ): path = "" if extra_dir is not None: path = extra_dir # extra_dir_at_root is ignored - since the iRODS plugin does not use # the directory hash, there is only one level of subdirectory. if not dir_only: # the .dat extension is stripped when stored in iRODS # TODO: is the strip_dat kwarg the best way to implement this? if strip_dat and alt_name and alt_name.endswith(".dat"): alt_name = os.path.splitext(alt_name)[0] default_name = "dataset_%s" % obj.id if not strip_dat: default_name += ".dat" path = path_join(path, alt_name if alt_name else default_name) path = path_join(self.root_collection_path, path) return path
def __get_rods_path(self, obj, base_dir=None, dir_only=False, extra_dir=None, extra_dir_at_root=False, alt_name=None, strip_dat=True, **kwargs): path = "" if extra_dir is not None: path = extra_dir # extra_dir_at_root is ignored - since the iRODS plugin does not use # the directory hash, there is only one level of subdirectory. if not dir_only: # the .dat extension is stripped when stored in iRODS # TODO: is the strip_dat kwarg the best way to implement this? if strip_dat and alt_name and alt_name.endswith('.dat'): alt_name = os.path.splitext(alt_name)[0] default_name = 'dataset_%s' % obj.id if not strip_dat: default_name += '.dat' path = path_join(path, alt_name if alt_name else default_name) path = path_join(self.root_collection_path, path) #log.debug( 'iRODS path for %s %s is %s', obj.__class__.__name__, obj.id, path ) return path
def __get_rods_path( self, obj, base_dir=None, dir_only=False, extra_dir=None, extra_dir_at_root=False, alt_name=None, strip_dat=True, **kwargs ): # extra_dir should never be constructed from provided data but just # make sure there are no shenannigans afoot if extra_dir and extra_dir != os.path.normpath(extra_dir): log.warning('extra_dir is not normalized: %s', extra_dir) raise ObjectInvalid("The requested object is invalid") # ensure that any parent directory references in alt_name would not # result in a path not contained in the directory path constructed here if alt_name: if not safe_relpath(alt_name): log.warning('alt_name would locate path outside dir: %s', alt_name) raise ObjectInvalid("The requested object is invalid") # alt_name can contain parent directory references, but iRODS will # not follow them, so if they are valid we normalize them out alt_name = os.path.normpath(alt_name) path = "" if extra_dir is not None: path = extra_dir # extra_dir_at_root is ignored - since the iRODS plugin does not use # the directory hash, there is only one level of subdirectory. if not dir_only: # the .dat extension is stripped when stored in iRODS # TODO: is the strip_dat kwarg the best way to implement this? if strip_dat and alt_name and alt_name.endswith( '.dat' ): alt_name = os.path.splitext( alt_name )[0] default_name = 'dataset_%s' % obj.id if not strip_dat: default_name += '.dat' path = path_join( path, alt_name if alt_name else default_name ) path = path_join( self.root_collection_path, path ) return path
def default(self, project, proto, *args): if proto not in ('http:', 'https:') and args: raise cherrypy.HTTPError(requests.codes.BAD_REQUEST) file = args[-1] file_path = os.path.join(self._storage, project, file) if not os.path.exists(file_path): url = f'{proto}//{urlunquote("/".join(args))}' try: r = requests.get(url, stream=True, headers=self._headers, proxies=self._proxies, timeout=self._timeout) except Exception as ex: msg = str(ex) cherrypy.log(msg) raise cherrypy.HTTPError(message=msg) if r.status_code == requests.codes.OK: os.makedirs(os.path.dirname(file_path), exist_ok=True) resp_headers = cherrypy.response.headers resp_headers['Content-Type'] = r.headers['Content-Type'] resp_headers['Content-Length'] = r.headers['Content-Length'] return _content(r, file_path) else: raise cherrypy.HTTPError(r.status_code) else: raise cherrypy.HTTPRedirect(path_join(root_storage, project, file))
def _remote(self, project, project_path, response): def replace(m): return self._replace(m, project) if os.path.exists(project_path): def replace_local(m): return self._replace(m, project, True) local_files = self._local_files(project_path) lines = [] for line in response.text.splitlines(): if '<a ' in line: file = search_re.search(line)[1] if file in local_files: item = (file, replace_re.sub(replace_local, line)) del local_files[file] else: item = (file, replace_re.sub(replace, line)) lines.append(item) for file in local_files: url = path_join(root_storage, project, file) hash_data = local_files[file] if hash_data: url += '#' + hash_data lines.append((file, f'<a href="{url}">{file}</a><br>')) lines.sort(key=lambda x: parse_version(x[0])) content = '\n'.join(line[1] for line in lines) return _HTML % dict(project=project, content=content) return replace_re.sub(replace, response.text)
def _replace(self, m, project, local=False): if local: url = path_join(root_storage, project, m['url'].rsplit('/', 1)[-1]) if m['hash']: return f'<a href="{url}#{m["hash"]}"' else: return f'<a href="{url}"' else: url = urljoin(self._index_url, m['url']) if m['hash']: return (f'<a href="{packs_path}/{project}/' f'{urlquote(url)}#{m["hash"]}"') else: return f'<a href="{packs_path}/{project}/{urlquote(url)}"'
def _local(self, project, project_path): if os.path.exists(project_path): lines = [] for file in sorted(os.listdir(project_path), key=parse_version): if file.endswith(hash_ext): continue url = path_join(root_storage, project, file) hash_file = os.path.join(project_path, file + hash_ext) if os.path.exists(hash_file): url += '#' + read_all(hash_file).strip() lines.append(f'<a href="{url}">{file}</a><br>') if lines: return _HTML % dict(project=project, content='\n'.join(lines)) raise cherrypy.HTTPError(requests.codes.NOT_FOUND)
def unzip(f): z = mkZipFileRd(f) names = z.namelist() if len(names) != 1: raise IOError('more than one item in zip file; which to use? %s' % names) # noqa member = names[0] log.info('extracting %s from %s', member, f) # x.zip -> x -> x # x.db.zip -> x.db -> x destdir = splitext(splitext(f)[0])[0] dest = destdir + '.db' z.extract(member, destdir) rename(path_join(destdir, member), dest) rmdir(destdir) return dest
def _files(self, project): proj_path = os.path.join(self._storage, project) files = None if os.path.exists(proj_path): files = [] with os.scandir(proj_path) as it: for entry in sorted(it, key=lambda e: parse_version(e.name)): if entry.name.endswith(hash_ext): continue stat = entry.stat() files.append( ProjectFile( entry.name, path_join(path, storage, project, entry.name), format_bin_prefix('.1f', stat.st_size), datetime.fromtimestamp(stat.st_mtime).isoformat( ' ', 'seconds'))) return files
config = { '/': {}, storage: { 'tools.staticdir.on': True, 'tools.staticdir.dir': '.', }, '/favicon.ico': { 'tools.staticfile.on': True, 'tools.staticfile.filename': get_favicon_path(), } } proj_nam_re = re.compile('^([A-Z0-9]|[A-Z0-9][A-Z0-9._-]*[A-Z0-9])$', re.IGNORECASE) hash_ext = '.sha256' chunk_size = 8192 project_path = path_join(path, '/project/') class Root: def __init__(self, cfg): if cfg['admin-pass']: self._password = cfg['admin-pass'] config['/admin'] = { 'tools.sessions.on': True, 'tools.sessions.name': 'admin_session_id', 'tools.sessions.timeout': cfg['admin-expire'] } self._admin_enabled = bool(cfg['admin-pass']) self._storage = cfg['storage-path'] self._project_url = cfg['project-url']
def descendant(sub): d = path_join(path, sub) if not d.startswith(path): raise IOError('%s not under %s', d, path) return d
import os from posixpath import join as path_join from urllib.parse import unquote as urlunquote import cherrypy import requests from . import root, PACKS_PATH path = PACKS_PATH config = {'/': {}} root_storage = path_join(root.path, root.storage) chunk_size = 8192 class Packs: def __init__(self, cfg): self._storage = cfg['storage-path'] self._proxies = cfg['proxies'] self._timeout = cfg['timeout'] self._headers = {'User-Agent': cfg['user-agent']} @cherrypy.expose def default(self, project, proto, *args): if proto not in ('http:', 'https:') and args: raise cherrypy.HTTPError(requests.codes.BAD_REQUEST) file = args[-1] file_path = os.path.join(self._storage, project, file) if not os.path.exists(file_path): url = f'{proto}//{urlunquote("/".join(args))}' try: