class GalaxyFileSystem(): def __init__(self, url, user): u = urlsplit(url) if u.scheme != 'http' and u.scheme != 'https': raise ValueError("Invalid name node address") self.url = urlunparse((u.scheme, u.netloc, '', '', '', '')) self.localdir = "" self.prefix = 'GalaxyFS' self.lddaprefix = 'Libraries' self.hdaprefix = 'Histories' self.client = GalaxyInstance(self.url, user) def normalize_path(self, path): path = os.path.normpath(path) if path.startswith(self.prefix): path = path[len(self.prefix):] while path and path[0] == os.sep: path = path[1:] return os.path.join(self.localdir, path) def strip_root(self, path): if path.startswith(self.url): path = path[len(self.url):] if not path.startswith(self.localdir): raise 'Invalid hdfs path. It must start with the root directory' if not path.startswith(self.localdir): return path return path[len(self.localdir):] def make_fullpath(self, path): path = self.normalize_path(path) return os.path.join(self.prefix, path) def create_folder(self, path): try: path = self.normalize_path(path) parts = pathlib.Path(normalized_path).parts if len(parts) > 3: raise ValueError("Galaxy path may have maximum 3 parts.") if parts[0] == self.lddaprefix: id = self.client.libraries.create_library(parts[-1]) else: id = self.client.histories.create_history(parts[-1]) parts[-1] = id path = os.sep.join(parts) return self.make_fullpath(path) except: return None return path def remove(self, path): try: path = self.normalize_path(path) parts = pathlib.Path(normalized_path).parts if len(parts) == 3: raise ValueError("Galaxy path may have maximum 3 parts.") if parts[0] == self.lddaprefix: id = self.client.libraries.delete_library(library_id=parts[-1]) else: id = self.client.histories.delete_history(history_id=parts[-1]) except Exception as e: print(e) def rename(self, oldpath, newpath): try: oldpath = self.normalize_path(oldpath) newpath = self.normalize_path(newpath) self.client.rename(oldpath, newpath) except Exception as e: print(e) def get_files(self, path): path = self.normalize_path(path) files = [] for f in self.client.list(path): status = self.client.status(join(path, f), False) if status['type'] != "DIRECTORY": files.append(f) return files def get_folders(self, path): try: path = self.normalize_path(path) parts = pathlib.Path(normalized_path).parts if len(parts) > 3: raise ValueError("Galaxy path may have maximum 3 parts.") if parts[0] == self.lddaprefix: id = self.client.libraries.create_library(parts[-1]) else: id = self.client.histories.create_history(parts[-1]) parts[-1] = id path = os.sep.join(parts) return self.make_fullpath(path) except: return [] return path def exists(self, path): return self.isdir(path) or self.ispath(path) def isdir(self, path): path = self.normalize_path(path) return path == self.lddaprefix or path == self.hdaprefix def isfile(self, path): return not self.isdir(path) and self.name_from_id(path) def read(self, path): path = self.normalize_path(path) with self.client.read(path) as reader: return reader.read().decode('utf-8') def write(self, path, content): path = self.normalize_path(path) self.client.write(path, content) def name_from_id(self, path): normalized_path = self.normalize_path(path) parts = pathlib.Path(normalized_path).parts if len(parts) == 0: return "" elif len(parts) == 1: return self.lddaprefix if parts[ 0] == self.lddaprefix else self.hdaprefix elif len(parts) == 2: info = self.client.libraries.get_libraries( library_id=parts[1] )[0] if parts[ 0] == self.lddaprefix else self.client.histories.get_histories( history_id=parts[1])[0] else: hda_or_ldda = 'ldda' if parts[0] == self.lddaprefix else 'hda' info = self.client.datasets.show_dataset( dataset_id=os.path.basename(normalized_path), hda_ldda=hda_or_ldda) if info: return info['name'] def make_json(self, path): normalized_path = self.normalize_path(path) if not normalized_path: return [ self.make_json(self.lddaprefix), self.make_json(self.hdaprefix) ] else: data_json = { 'path': os.path.join(self.url, normalized_path), 'text': self.name_from_id(path) } parts = pathlib.Path(normalized_path).parts if parts[0] == self.lddaprefix: if len(parts) == 1: data_json['folder'] = True libraries = self.client.libraries.get_libraries() data_json['nodes'] = [ self.make_json(os.path.join(path, fn['id'])) for fn in libraries ] elif len(parts) == 2: data_json['folder'] = True #library = self.client.libraries.get_libraries(library_id = parts[1]) #data_json['nodes'] = [self.make_json(os.path.join(path, fn['id'])) for fn in libraries] elif parts[0] == self.hdaprefix: if len(parts) == 1: data_json['folder'] = True histories = self.client.histories.get_histories() data_json['nodes'] = [ self.make_json(os.path.join(path, fn['id'])) for fn in histories ] elif len(parts) == 2: data_json['folder'] = True datasets = self.client.histories.show_matching_datasets( parts[1]) data_json['nodes'] = [ self.make_json(os.path.join(path, fn['id'])) for fn in datasets ] return data_json def save_upload(self, file, fullpath): localpath = os.path.join(tempfile.gettempdir(), os.path.basename(fullpath)) if os.path.isfile(localpath): os.remove(localpath) try: file.save(localpath) if isfile(fullpath): fullpath = os.path.dirname(fullpath) self.client.upload(self.normalize_path(fullpath), localpath, True) except: pass def download(self, path): path = self.normalize_path(path) status = self.client.status(path, False) if status is not None and status['type'] == "FILE": localpath = os.path.join(tempfile.gettempdir(), os.path.basename(path)) return self.client.download(path, localpath, True) else: return None
class GalaxyFileSystem(): urlKey = 0 hlddTitleKey = 1 hlddKey = 2 folderKey = 3 hdaKey = 3 lddaKey = 4 def __init__(self, url, user): u = urlsplit(url) if u.scheme != 'http' and u.scheme != 'https': raise ValueError("Invalid name node address") self.url = urlunparse((u.scheme, u.netloc, '', '', '', '')) self.localdir = "" self.prefix = 'GalaxyFS' self.lddaprefix = 'Libraries' self.hdaprefix = 'Histories' self.client = GalaxyInstance(self.url, user) def typename(self): return "gfs" def strip_prefix(self, path): return path[len(self.prefix):] if self.prefix and path.startswith( self.prefix) else path def normalize_path(self, path): if self.prefix: path = self.strip_prefix(path) if self.url and path.startswith(self.url): return path if not self.localdir or path.startswith(self.localdir): return os.path.join(self.url, path) while path and path[0] == os.sep: path = path[1:] return os.path.join(self.url, self.localdir, path) def normalize_fullpath(self, path): return self.normalize_path(path) def strip_root(self, path): path = self.strip_prefix(path) if path.startswith(self.url): path = path[len(self.url):] if not path.startswith(self.localdir): raise 'Invalid hdfs path. It must start with the root directory' return path[len(self.localdir):] if path.startswith( self.localdir) else path def make_fullpath(self, path): path = self.normalize_path(path) return os.path.join(self.prefix, path) def makedirs(self, path): return self.mkdir(path) def mkdir(self, path): try: path = self.normalize_path(path) parts = self.path_parts(path) if len(parts) > 4 or len(parts) < 3: return "" #raise ValueError("Galaxy path may have maximum 4 parts.") hd_ldd = '' if len(parts) == 3: hd_ldd = self.client.libraries.create_library( parts[-1]) if self.islibrary( parts[GalaxyFileSystem.hlddTitleKey] ) else self.client.histories.create_history(parts[-1]) elif len(parts) == 4: if not self.islibrary(parts[GalaxyFileSystem.hlddTitleKey]): return "" hd_ldd = self.client.libraries.create_folder( parts[GalaxyFileSystem.lddaKey], parts[-1]) parts[-1] = hd_ldd['id'] return os.sep.join(parts['id']) except: return None def unique_fs_name(self, path, prefix, ext): return os.path.join(path, prefix + "_" + str(uuid.uuid4()) + ext) def remove(self, path): try: path = self.normalize_path(path) parts = self.path_parts(path) if len(parts) > 4: raise ValueError("Galaxy path may have maximum 4 parts.") if self.islibrary(parts[GalaxyFileSystem.hlddTitleKey]): self.client.libraries.delete_library(library_id=parts[-1]) else: self.client.histories.delete_history(history_id=parts[-1]) except Exception as e: print(e) def rename(self, oldpath, newpath): try: oldpath = self.normalize_path(oldpath) newpath = self.normalize_path(newpath) self.client.rename(oldpath, newpath) return self.strip_root(newpath) except Exception as e: print(e) def copyfile(self, src, dst): if self.islibrarydata(src) and not self.islibrarydata(dst): parts = self.path_parts(self.normalize_path(src)) self.client.libraries.copy_from_dataset( parts[GalaxyFileSystem.hlddKey], self.id_from_path(dst), parts[GalaxyFileSystem.folderKey]) elif not self.islibrary(parts[ GalaxyFileSystem.hlddTitleKey]) and self.islibrarydata(dst): parts = self.path_parts(self.normalize_path(dst)) self.client.histories.upload_dataset_from_library( parts[GalaxyFileSystem.hdaKey], self.id_from_path(src)) else: content = self.read(src) if self.isdir(dst): dst = os.path.join(dst, os.path.basename(src)) self.write(dst, content) return self.normalize_fullpath(dst) def get_files(self, path): path = self.normalize_path(path) files = [] for f in self.client.list(path): status = self.client.status(join(path, f), False) if status['type'] != "DIRECTORY": files.append(f) return files def get_folders(self, path): try: normalized_path = self.normalize_path(path) parts = self.path_parts(normalized_path) if len(parts) > 4: raise ValueError("Galaxy path may have maximum 4 parts.") parts[-1] = self.client.libraries.create_library( parts[-1]) if self.islibrary( parts[GalaxyFileSystem.hlddTitleKey] ) else self.client.histories.create_history(parts[-1]) path = os.sep.join(parts) return self.make_fullpath(path) except: return [] return path def exists(self, path): return self.isdir(path) or self.isfile(path) def islibrary(self, name): return name == self.lddaprefix def islibrarydata(self, path): normalized_path = self.normalize_path(path) parts = self.path_parts(normalized_path) return self.islibrary(parts[GalaxyFileSystem.hlddTitleKey]) def isdir(self, path): normalized_path = self.normalize_path(path) parts = self.path_parts(normalized_path) return len(parts) <= GalaxyFileSystem.lddaKey if self.islibrary( parts[GalaxyFileSystem. hlddTitleKey]) else len(parts) <= GalaxyFileSystem.hdaKey def isfile(self, path): return not self.isdir(path) and self.name_from_id(path) != "" def join(self, path1, path2): path1 = self.normalize_path(path1) return os.path.join(path1, path2) def make_unique_dir(self, path): unique_dir = self.join(path, str(uuid.uuid4())) self.makedirs(unique_dir) return unique_dir def read(self, path): path = self.normalize_path(path) with self.client.read(path, 'rb') as reader: return reader.read() def write(self, path, content): path = self.normalize_path(path) self.client.write(path, content) def id_from_path(self, path): normalized_path = self.normalize_path(path) parts = self.path_parts(normalized_path) if len(parts) <= GalaxyFileSystem.urlKey + 1: return "" elif len(parts ) == GalaxyFileSystem.hlddTitleKey + 1: #Histories/Libraries return parts[GalaxyFileSystem.hlddTitleKey] elif len(parts ) == GalaxyFileSystem.hlddKey + 1: #library-name/history-name info = self.client.libraries.get_libraries( library_id=parts[GalaxyFileSystem.hlddKey] )[0] if parts[ GalaxyFileSystem. hlddTitleKey] == self.lddaprefix else self.client.histories.get_histories( history_id=parts[GalaxyFileSystem.hlddKey])[0] return info['id'] elif len( parts ) == GalaxyFileSystem.folderKey + 1: #Folder(library)/Dataset(history) if parts[GalaxyFileSystem.hlddTitleKey] == self.lddaprefix: folder = self.client.folders.show_folder(parts[3], False) return folder['id'] else: info = self.client.datasets.show_dataset( dataset_id=parts[GalaxyFileSystem.hdaKey], hda_ldda='hda') return info['id'] elif len(parts) == GalaxyFileSystem.lddaKey + 1: info = self.client.datasets.show_dataset( dataset_id=parts[GalaxyFileSystem.lddaKey], hda_ldda='ldda') return info['id'] def path_parts(self, path): parts = [] if path.startswith(self.prefix): parts.append(self.prefix) if len(path) > len(self.prefix): path = path[len(self.prefix) + 1:] elif path.startswith(self.url): parts.append(self.url) if len(path) > len(self.url): path = path[len(self.url) + 1:] parts.extend(pathlib.Path(path).parts) return parts def name_from_id(self, path): normalized_path = self.normalize_path(path) parts = self.path_parts(normalized_path) if len(parts) <= GalaxyFileSystem.urlKey + 1: return "" elif len(parts ) == GalaxyFileSystem.hlddTitleKey + 1: #Histories/Libraries return parts[GalaxyFileSystem.hlddTitleKey] elif len(parts ) == GalaxyFileSystem.hlddKey + 1: #library-name/history-name info = self.client.libraries.get_libraries(library_id=parts[ GalaxyFileSystem.hlddKey])[0] if self.islibrary( parts[GalaxyFileSystem.hlddTitleKey] ) else self.client.histories.get_histories( history_id=parts[GalaxyFileSystem.hlddKey])[0] return info['name'] elif len( parts ) == GalaxyFileSystem.folderKey + 1: #Folder(library)/Dataset(history) if parts[GalaxyFileSystem.hlddTitleKey] == self.lddaprefix: folder = self.client.folders.show_folder( parts[GalaxyFileSystem.folderKey], False) return folder['name'] else: info = self.client.datasets.show_dataset( dataset_id=parts[GalaxyFileSystem.hdaKey], hda_ldda='hda') return info['name'] elif len(parts) == GalaxyFileSystem.lddaKey + 1: info = self.client.datasets.show_dataset( dataset_id=parts[GalaxyFileSystem.lddaKey], hda_ldda='ldda') return info['name'] def make_json_item(self, path): data_json = { 'path': self.normalize_path(path), 'text': "{0}(id:{1})".format(self.name_from_id(path), self.id_from_path(path)) } if self.isdir(path): data_json['nodes'] = [] return data_json def make_json(self, path): normalized_path = self.normalize_path(path) if not normalized_path or normalized_path == self.url: return [ self.make_json_item(urljoin(self.url, self.lddaprefix)), self.make_json_item(urljoin(self.url, self.hdaprefix)) ] else: data_json = self.make_json_item(path) parts = self.path_parts(normalized_path) if self.islibrary(parts[GalaxyFileSystem.hlddTitleKey]): if len(parts) == GalaxyFileSystem.hlddTitleKey + 1: libraries = self.client.libraries.get_libraries() data_json['nodes'] = [ self.make_json_item(os.path.join(path, fn['id'])) for fn in libraries ] elif len(parts) == GalaxyFileSystem.hlddKey + 1: folders = self.client.libraries.get_folders( library_id=parts[GalaxyFileSystem.hlddKey]) data_json['nodes'] = [ self.make_json_item(os.path.join(path, fn['id'])) for fn in folders ] elif len(parts) == GalaxyFileSystem.folderKey + 1: folder = self.client.folders.show_folder( parts[GalaxyFileSystem.folderKey], True) data_json['nodes'] = [ self.make_json_item(os.path.join(path, fn['id'])) for fn in folder['folder_contents'] ] else: if len(parts) == GalaxyFileSystem.hlddTitleKey + 1: histories = self.client.histories.get_histories() data_json['nodes'] = [ self.make_json_item(os.path.join(path, fn['id'])) for fn in histories ] elif len(parts) == GalaxyFileSystem.hlddKey + 1: datasets = self.client.histories.show_matching_datasets( parts[GalaxyFileSystem.hlddKey]) data_json['nodes'] = [ self.make_json_item(os.path.join(path, fn['id'])) for fn in datasets ] data_json['loaded'] = True return data_json @staticmethod def get_history_path(url, history_id, data_id): return urljoin(url, os.path.join('Histories', history_id, data_id)) def make_json_r(self, path): normalized_path = self.normalize_path(path) if not normalized_path or normalized_path == self.url: return [ self.make_json_r(urljoin(self.url, self.lddaprefix)), self.make_json_r(urljoin(self.url, self.hdaprefix)) ] else: data_json = self.make_json_item(path) parts = self.path_parts(normalized_path) if self.islibrary(parts[GalaxyFileSystem.hlddTitleKey]): if len(parts) == GalaxyFileSystem.hlddTitleKey + 1: libraries = self.client.libraries.get_libraries() data_json['nodes'] = [ self.make_json_r(os.path.join(path, fn['id'])) for fn in libraries ] elif len(parts) == GalaxyFileSystem.hlddKey + 1: folders = self.client.libraries.get_folders( library_id=parts[GalaxyFileSystem.hlddKey]) data_json['nodes'] = [ self.make_json_r(os.path.join(path, fn['id'])) for fn in folders ] elif len(parts) == GalaxyFileSystem.folderKey + 1: folder = self.client.folders.show_folder( parts[GalaxyFileSystem.folderKey], True) data_json['nodes'] = [ self.make_json_item(os.path.join(path, fn['id'])) for fn in folder['folder_contents'] ] else: if len(parts) == GalaxyFileSystem.hlddTitleKey + 1: histories = self.client.histories.get_histories() data_json['nodes'] = [ self.make_json_r(os.path.join(path, fn['id'])) for fn in histories ] elif len(parts) == GalaxyFileSystem.hlddKey + 1: datasets = self.client.histories.show_matching_datasets( parts[GalaxyFileSystem.hlddKey]) data_json['nodes'] = [ self.make_json_item(os.path.join(path, fn['id'])) for fn in datasets ] data_json['loaded'] = True return data_json def save_upload(self, file, path): if self.isfile(path): path = os.path.dirname(path) elif not self.isdir(path): return "" parts = self.path_parts(path) if not parts or len(parts) < 3: return "" localpath = os.path.join(tempfile.gettempdir(), os.path.basename(file.filename)) if os.path.exists(localpath): fs = PosixFileSystem('/') unique_dir = fs.make_unique_dir(os.path.dirname(localpath)) localpath = os.path.join(unique_dir, os.path.basename(file.filename)) try: file.save(localpath) dataset = '' if self.islibrary(parts[1]): dataset = self.client.libraries.upload_file_from_local_path( parts[2], localpath, folder_id=parts[3] if len(parts) > 3 else None) else: dataset = self.client.tools.upload_file(localpath, parts[2]) if dataset: return os.path.join(path, dataset['id']) except: pass def download(self, path): path = self.normalize_path(path) if self.isdir(path): return None dataset = self.client.datasets.show_dataset( dataset_id=os.path.basename(path), hda_ldda='ldda' if self.islibrarydata(path) else 'hda') name = dataset['name'] if not pathlib.Path(name).suffix and dataset['file_ext']: name += '.' + dataset['file_ext'] localpath = os.path.join(tempfile.gettempdir(), name) if os.path.exists(localpath): fs = PosixFileSystem('/') unique_dir = fs.make_unique_dir(os.path.dirname(localpath)) localpath = os.path.join(unique_dir, name) self.client.datasets.download_dataset(os.path.basename(path), file_path=localpath, use_default_filename=False) return localpath