def remove_data(self, dataset_name='', where=ROOT_NODE_PATH): """ Deleting a data set from H5 file. :param dataset_name:name of the data set to be deleted :param where: represents the path where dataset is stored (e.g. /data/info) """ LOG.debug("Removing data set: %s" % dataset_name) try: # Open file in append mode ('a') to allow data remove hdf5_file = self._open_h5_file() del hdf5_file[where + dataset_name] except KeyError: LOG.warn( "Trying to delete data set: %s but current file does not contain it." % dataset_name) raise FileStructureException("Could not locate dataset: %s" % dataset_name) finally: self.close_file() self.data_encryption_handler.push_folder_to_sync( FilesHelper.get_project_folder_from_h5( self.__storage_full_name))
def __init__(self, dest_path, mode): try: ZipFile.__init__(self, dest_path, mode, ZIP_DEFLATED, True) except Exception as excep: raise FileStructureException( "Could not open the given ZIP file..." + str(excep)) self.logger = get_logger(self.__class__.__module__)
def _check_data(data_list): """ Check if the data to be stores is in a good format. If not adapt it. """ if data_list is None: raise FileStructureException("Could not store null data") if not (isinstance(data_list, list) or isinstance(data_list, numpy.ndarray)): raise FileStructureException( "Invalid data type. Could not store data of type:" + str(type(data_list))) data_to_store = data_list if isinstance(data_to_store, list): data_to_store = numpy.array(data_list) if data_to_store.shape == (): data_to_store = hdf5.Empty("f") return data_to_store
def remove_folder(folder_path, ignore_errors): """ Given a folder path, try to remove that folder from disk. :param ignore_errors: When False throw FileStructureException if folder_path is invalid. """ if os.path.isdir(folder_path): shutil.rmtree(folder_path, ignore_errors) return if not ignore_errors: raise FileStructureException("Given path does not exists, or is not a folder " + str(folder_path))
def get_metadata(self, dataset_name='', where=ROOT_NODE_PATH): """ Retrieve ALL meta-data information for root node or for a given data set. :param dataset_name: name of the dataset for which to read metadata. If None, read metadata from ROOT node. :param where: represents the path where dataset is stored (e.g. /data/info) :returns: a dictionary containing all metadata associated with the node """ LOG.debug("Retrieving metadata for dataset: %s" % dataset_name) meta_key = "" try: # Open file to read data hdf5_file = self._open_h5_file('r') node = hdf5_file[where + dataset_name] # Now retrieve metadata values all_meta_data = {} for meta_key in node.attrs: new_key = meta_key if meta_key.startswith(self.TVB_ATTRIBUTE_PREFIX): new_key = meta_key[len(self.TVB_ATTRIBUTE_PREFIX):] value = node.attrs[meta_key] all_meta_data[new_key] = self._deserialize_value(value) return all_meta_data except KeyError: msg = "Trying to read data from a missing data set: %s" % ( where + dataset_name) LOG.warning(msg) raise MissingDataSetException(msg) except AttributeError: msg = "Trying to get value for missing metadata %s" % meta_key LOG.exception(msg) raise FileStructureException(msg) except Exception: msg = "Failed to read metadata from H5 file! %s" % self.__storage_full_name LOG.exception(msg) raise FileStructureException(msg) finally: self.close_file()
def move_datatype(self, new_project_name, new_op_id, full_path): """ Move H5 storage into a new location """ try: folder = self.get_project_folder(new_project_name, str(new_op_id)) full_new_file = os.path.join(folder, os.path.split(full_path)[1]) os.rename(full_path, full_new_file) except Exception: self.logger.exception("Could not move file") raise FileStructureException("Could not move " + full_path)
def remove_metadata(self, meta_key, dataset_name='', tvb_specific_metadata=True, where=ROOT_NODE_PATH): """ Remove meta-data information for root node or for a given data set. :param meta_key: name of the metadata attribute to be removed :param dataset_name: name of the dataset from where to delete metadata. If None, metadata will be removed from ROOT node. :param tvb_specific_metadata: specify if the provided metadata is specific to TVB (keys will have a TVB prefix). :param where: represents the path where dataset is stored (e.g. /data/info) """ LOG.debug("Deleting metadata: %s for dataset: %s" % (meta_key, dataset_name)) try: # Open file to read data hdf5_file = self._open_h5_file() node = hdf5_file[where + dataset_name] # Now delete metadata key_to_remove = meta_key if tvb_specific_metadata: key_to_remove = self.TVB_ATTRIBUTE_PREFIX + meta_key del node.attrs[key_to_remove] except KeyError: LOG.error("Trying to delete metadata on a missing data set: %s" % dataset_name) raise FileStructureException("Could not locate dataset: %s" % dataset_name) except AttributeError: LOG.error("Trying to delete missing metadata %s" % meta_key) raise FileStructureException( "There is no metadata named %s on this node" % meta_key) finally: self.close_file() self.data_encryption_handler.push_folder_to_sync( FilesHelper.get_project_folder_from_h5( self.__storage_full_name))
def remove_datatype_file(self, h5_file): """ Remove H5 storage fully. """ try: if os.path.exists(h5_file): os.remove(h5_file) else: self.logger.warning("Data file already removed:" + str(h5_file)) except Exception: self.logger.exception("Could not remove file") raise FileStructureException("Could not remove " + str(h5_file))
def __open_h5_file(self, mode='a'): """ Open file for reading, writing or append. :param mode: Mode to open file (possible values are w / r / a). Default value is 'a', to allow adding multiple data to the same file. :returns: returns the file which stores data in HDF5 format opened for read / write according to mode param """ if self.__storage_full_name is None: raise FileStructureException( "Invalid storage file. Please provide a valid path.") try: # Check if file is still open from previous writes. if self.__hfd5_file is None or not self.__hfd5_file.id.valid: file_exists = os.path.exists(self.__storage_full_name) # bug in some versions of hdf5 on windows prevent creating file with mode='a' if not file_exists and mode == 'a': mode = 'w' LOG.debug("Opening file: %s in mode: %s" % (self.__storage_full_name, mode)) self.__hfd5_file = hdf5.File(self.__storage_full_name, mode, libver='latest') # If this is the first time we access file, write data version if not file_exists: os.chmod(self.__storage_full_name, TvbProfile.current.ACCESS_MODE_TVB_FILES) attr_name = self.TVB_ATTRIBUTE_PREFIX + TvbProfile.current.version.DATA_VERSION_ATTRIBUTE self.__hfd5_file['/'].attrs[ attr_name] = TvbProfile.current.version.DATA_VERSION except (IOError, OSError) as err: LOG.exception("Could not open storage file.") raise FileStructureException("Could not open storage file. %s" % err) return self.__hfd5_file
def __init__(self, storage_full_name): """ Creates a new storage manager instance. :param buffer_size: the size in Bytes of the amount of data that will be buffered before writing to file. """ if storage_full_name is None: raise FileStructureException( "Please provide the file name where to store data") self.__storage_full_name = storage_full_name self.__buffer_array = None self.data_buffers = {} self.data_encryption_handler = DataEncryptionHandler()
def unpack_zip(self, folder_path): """ Simple method to unpack ZIP archive in a given folder. """ def to_be_excluded(name): excluded_paths = ["__MACOSX/", ".DS_Store"] for excluded in excluded_paths: if name.startswith(excluded) or name.find('/' + excluded) >= 0: return True return False try: result = [] for filename in self.namelist(): if not to_be_excluded(filename): result.append(self.extract(filename, folder_path)) return result except BadZipfile as excep: self.logger.exception("Could not process zip file") raise FileStructureException("Invalid ZIP file..." + str(excep)) except Exception as excep: self.logger.exception("Could not process zip file") raise FileStructureException( "Could not unpack the given ZIP file..." + str(excep))
def remove_operation_data(self, project_name, operation_id): """ Remove H5 storage fully. """ try: complete_path = self.get_project_folder(project_name, str(operation_id)) self.logger.debug("Removing: " + str(complete_path)) self.remove_folder(complete_path, False) except FileStructureException: self.logger.exception("Could not remove files") raise FileStructureException("Could not remove files for OP" + str(operation_id))
def remove_project_structure(self, project_name): """ Remove all folders for project or THROW FileStructureException. """ try: complete_path = self.get_project_folder(project_name) if os.path.exists(complete_path): if os.path.isdir(complete_path): shutil.rmtree(complete_path) else: os.remove(complete_path) self.logger.debug("Project folders were removed for " + project_name) except OSError: self.logger.exception("A problem occurred while removing folder.") raise FileStructureException("Permission denied. Make sure you have write access on TVB folder!")
def check_created(self, path): """ Check that the given folder exists, otherwise create it, with the entire tree of parent folders. This method is synchronized, for parallel access from events, to avoid conflicts. """ try: # if this is meant to be used concurrently it might be better to catch OSError 17 then checking exists if not os.path.exists(path): self.logger.debug("Creating folder:" + str(path)) os.makedirs(path, mode=TvbProfile.current.ACCESS_MODE_TVB_FILES) os.chmod(path, TvbProfile.current.ACCESS_MODE_TVB_FILES) except OSError: self.logger.exception("COULD NOT CREATE FOLDER! CHECK ACCESS ON IT!") raise FileStructureException("Could not create Folder" + str(path))
def remove_operation_data(self, project_name, operation_id): """ Remove H5 storage fully. """ try: complete_path = self.get_project_folder(project_name, str(operation_id)) self.logger.debug("Removing: " + str(complete_path)) if os.path.isdir(complete_path): shutil.rmtree(complete_path) elif os.path.exists(complete_path): os.remove(complete_path) except Exception: self.logger.exception("Could not remove files") raise FileStructureException("Could not remove files for OP" + str(operation_id))
def rename_project_structure(self, project_name, new_name): """ Rename Project folder or THROW FileStructureException. """ try: path = self.get_project_folder(project_name) folder = os.path.split(path)[0] new_full_name = os.path.join(folder, new_name) if os.path.exists(new_full_name): raise IOError("Path exists %s " % new_full_name) os.rename(path, new_full_name) return path, new_full_name except Exception: self.logger.exception("Could not rename node!") raise FileStructureException("Could not rename to %s" % new_name)
def remove_project(self, project, sync_for_encryption=False): project_folder = self.get_project_folder(project.name) if sync_for_encryption: self.sync_folders(project_folder) try: self.remove_folder(project_folder) self.logger.debug("Project folders were removed for " + project.name) except OSError: self.logger.exception("A problem occurred while removing folder.") raise FileStructureException( "Permission denied. Make sure you have write access on TVB folder!" ) encrypted_path = DataEncryptionHandler.compute_encrypted_folder_path( project_folder) FilesHelper.remove_files([ encrypted_path, DataEncryptionHandler.project_key_path(project.id) ], True)