def __open_h5_file(self, mode='a'): """ Open file for reading, writing or append. :param mode: Mode to open file (possible values are w / r / a). Default value is 'a', to allow adding multiple data to the same file. :returns: returns the file which stores data in HDF5 format opened for read / write according to mode param """ if self.__storage_full_name is None: raise FileStructureException("Invalid storage file. Please provide a valid path.") try: # Check if file is still open from previous writes. if self.__hfd5_file is None or not self.__hfd5_file.id.valid: file_exists = os.path.exists(self.__storage_full_name) # bug in some versions of hdf5 on windows prevent creating file with mode='a' if not file_exists and mode == 'a': mode = 'w' LOG.debug("Opening file: %s in mode: %s" % (self.__storage_full_name, mode)) self.__hfd5_file = hdf5.File(self.__storage_full_name, mode, libver='latest') # If this is the first time we access file, write data version if not file_exists: os.chmod(self.__storage_full_name, TvbProfile.current.ACCESS_MODE_TVB_FILES) attr_name = self.TVB_ATTRIBUTE_PREFIX + TvbProfile.current.version.DATA_VERSION_ATTRIBUTE self.__hfd5_file['/'].attrs[attr_name] = TvbProfile.current.version.DATA_VERSION except (IOError, OSError) as err: LOG.exception("Could not open storage file.") raise FileStructureException("Could not open storage file. %s" % err) return self.__hfd5_file
def remove_metadata(self, meta_key, dataset_name='', tvb_specific_metadata=True, where=ROOT_NODE_PATH): """ Remove meta-data information for root node or for a given data set. :param meta_key: name of the metadata attribute to be removed :param dataset_name: name of the dataset from where to delete metadata. If None, metadata will be removed from ROOT node. :param tvb_specific_metadata: specify if the provided metadata is specific to TVB (keys will have a TVB prefix). :param where: represents the path where dataset is stored (e.g. /data/info) """ LOG.debug("Deleting metadata: %s for dataset: %s" % (meta_key, dataset_name)) if dataset_name is None: dataset_name = '' if where is None: where = self.ROOT_NODE_PATH try: # Open file to read data hdf5File = self._open_h5_file() node = hdf5File[where + dataset_name] # Now delete metadata key_to_remove = meta_key if tvb_specific_metadata: key_to_remove = self.TVB_ATTRIBUTE_PREFIX + meta_key del node.attrs[key_to_remove] except KeyError: LOG.error("Trying to delete metadata on a missing data set: %s" % dataset_name) raise FileStructureException("Could not locate dataset: %s" % dataset_name) except AttributeError: LOG.error("Trying to delete missing metadata %s" % meta_key) raise FileStructureException("There is no metadata named %s on this node" % meta_key) finally: self.close_file()
def __init__(self, storage_folder, file_name, buffer_size=600000): """ Creates a new storage manager instance. @param buffer_size: the size in Bytes of the amount of data that will be buffered before writing to file. """ if storage_folder is None: raise FileStructureException("Please provide the folder where to store data") if file_name is None: raise FileStructureException("Please provide the file name where to store data") self.__storage_full_name = os.path.join(storage_folder, file_name) self.__buffer_size = buffer_size self.__buffer_array = None self.data_buffers = {}
def get_metadata(self, dataset_name='', where=ROOT_NODE_PATH, ignore_errors=False): """ Retrieve ALL meta-data information for root node or for a given data set. :param dataset_name: name of the dataset for which to read metadata. If None, read metadata from ROOT node. :param where: represents the path where dataset is stored (e.g. /data/info) :returns: a dictionary containing all metadata associated with the node """ LOG.debug("Retrieving metadata for dataset: %s" % dataset_name) if dataset_name is None: dataset_name = '' if where is None: where = self.ROOT_NODE_PATH meta_key = "" try: # Open file to read data hdf5File = self._open_h5_file('r') node = hdf5File[where + dataset_name] # Now retrieve metadata values all_meta_data = {} for meta_key in node.attrs: new_key = meta_key if meta_key.startswith(self.TVB_ATTRIBUTE_PREFIX): new_key = meta_key[len(self.TVB_ATTRIBUTE_PREFIX):] value = node.attrs[meta_key] all_meta_data[new_key] = self._deserialize_value(value) return all_meta_data except KeyError: if not ignore_errors: msg = "Trying to read data from a missing data set: %s" % ( where + dataset_name) LOG.warning(msg) raise MissingDataSetException(msg) else: return numpy.ndarray(0) except AttributeError: msg = "Trying to get value for missing metadata %s" % meta_key LOG.error(msg) raise FileStructureException(msg) except Exception, excep: msg = "Failed to read metadata from H5 file! %s" % self.__storage_full_name LOG.exception(excep) LOG.error(msg) raise FileStructureException(msg)
def _check_data(self, data_list): """ Check if the data to be stores is in a good format. If not adapt it. """ if data_list is None: raise FileStructureException("Could not store null data") if not (isinstance(data_list, list) or isinstance(data_list, numpy.ndarray)): raise FileStructureException("Invalid data type. Could not store data of type:" + str(type(data_list))) data_to_store = data_list if isinstance(data_to_store, list): data_to_store = numpy.array(data_list) return data_to_store
def unpack_zip(self, uploaded_zip, folder_path): """ Simple method to unpack ZIP archive in a given folder. """ EXCLUDED_FOLDERS = [ "__MACOSX" + os.path.sep, ".DS_Store" + os.path.sep ] try: with zipfile.ZipFile(uploaded_zip) as zip_arch: result = [] for filename in zip_arch.namelist(): to_be_excluded = False for excluded in EXCLUDED_FOLDERS: if filename.startswith(excluded) or filename.find( os.path.sep + excluded) >= 0: to_be_excluded = True break if to_be_excluded: continue new_file_name = os.path.join(folder_path, filename) with zip_arch.open(filename, 'rU') as src: if new_file_name.endswith('/'): if not os.path.exists(new_file_name): os.makedirs(new_file_name) else: FilesHelper.copy_file(src, new_file_name) result.append(new_file_name) return result except BadZipfile, excep: self.logger.exception("Could not process zip file") raise FileStructureException("Invalid ZIP file..." + str(excep))
def __open_h5_file(self, mode='a', chunk_shape=None): """ Open file for reading, writing or append. :param mode: Mode to open file (possible values are w / r / a). Default value is 'a', to allow adding multiple data to the same file. :param chunk_shape: Shape for chunks at write. :return: returns the file which stores data in HDF5 format opened for read / write according to mode param """ if self.__storage_full_name is not None: # Check if file is still open from previous writes. if self.__hfd5_file is None or not self.__hfd5_file.fid.valid: file_exists = os.path.exists(self.__storage_full_name) LOG.debug("Opening file: %s in mode: %s" % (self.__storage_full_name, mode)) self.__hfd5_file = hdf5.File(self.__storage_full_name, mode, libver='latest', chunks=chunk_shape) # If this is the first time we access file, write data version if not file_exists: os.chmod(self.__storage_full_name, cfg.ACCESS_MODE_TVB_FILES) self.__hfd5_file['/'].attrs[self.TVB_ATTRIBUTE_PREFIX + cfg.DATA_VERSION_ATTRIBUTE] = cfg.DATA_VERSION return self.__hfd5_file else: raise FileStructureException("Invalid storage file. Please provide a valid path.")
def remove_data(self, dataset_name, where=ROOT_NODE_PATH): """ Deleting a data set from H5 file. :param dataset_name:name of the data set to be deleted :param where: represents the path where dataset is stored (e.g. /data/info) """ LOG.debug("Removing data set: %s" % dataset_name) if dataset_name is None: dataset_name = '' if where is None: where = self.ROOT_NODE_PATH try: # Open file in append mode ('a') to allow data remove hdf5_file = self._open_h5_file() del hdf5_file[where + dataset_name] except KeyError: LOG.warn( "Trying to delete data set: %s but current file does not contain it." % dataset_name) raise FileStructureException("Could not locate dataset: %s" % dataset_name) finally: self.close_file()
def remove_datatype(self, datatype): """ Remove H5 storage fully. """ try: os.remove(datatype.get_storage_file_path()) except Exception, excep: self.logger.error(excep) raise FileStructureException("Could not remove " + str(datatype))
def rename_project_structure(self, project_name, new_name): """ Rename Project folder or THROW FileStructureException. """ try: path = self.get_project_folder(project_name) folder = os.path.split(path)[0] new_full_name = os.path.join(folder, new_name) except Exception, excep: self.logger.error("Could not rename node!") self.logger.exception(excep) raise FileStructureException("Could not Rename:" + str(new_name))
def remove_folder(folder_path, ignore_errors=False): """ Given a folder path, try to remove that folder from disk. :param ignore_errors: When False throw FileStructureException if folder_path is invalid. """ if os.path.isdir(folder_path): shutil.rmtree(folder_path, ignore_errors) return if not ignore_errors: raise FileStructureException("Given path does not exists, or is not a folder " + str(folder_path))
def move_datatype(self, datatype, new_project_name, new_op_id, full_path): """ Move H5 storage into a new location """ try: folder = self.get_project_folder(new_project_name, str(new_op_id)) full_new_file = os.path.join(folder, os.path.split(full_path)[1]) os.rename(full_path, full_new_file) except Exception: self.logger.exception("Could not move file") raise FileStructureException("Could not move " + str(datatype))
def remove_datatype_file(self, h5_file): """ Remove H5 storage fully. """ try: if os.path.exists(h5_file): os.remove(h5_file) else: self.logger.warning("Data file already removed:" + str(h5_file)) except Exception: self.logger.exception("Could not remove file") raise FileStructureException("Could not remove " + str(h5_file))
def remove_operation_data(self, project_name, operation_id): """ Remove H5 storage fully. """ try: complete_path = self.get_operation_folder(project_name, operation_id) if os.path.isdir(complete_path): shutil.rmtree(complete_path) else: os.remove(complete_path) except Exception, excep: self.logger.error(excep) raise FileStructureException("Could not remove files for OP" + str(operation_id))
def remove_project_structure(self, project_name): """ Remove all folders for project or THROW FileStructureException. """ try: complete_path = self.get_project_folder(project_name) if os.path.exists(complete_path): if os.path.isdir(complete_path): shutil.rmtree(complete_path) else: os.remove(complete_path) self.logger.debug("Project folders were removed for " + project_name) except OSError: self.logger.exception("A problem occurred while removing folder.") raise FileStructureException("Permission denied. Make sure you have write access on TVB folder!")
def unpack_zip(self, uploaded_zip, folder_path): """ Simple method to unpack ZIP archive in a given folder. """ def to_be_excluded(name): excluded_paths = ["__MACOSX/", ".DS_Store"] for excluded in excluded_paths: if name.startswith(excluded) or name.find('/' + excluded) >= 0: return True return False try: result = [] with ZipFile(uploaded_zip) as zip_arch: for filename in zip_arch.namelist(): if not to_be_excluded(filename): result.append(zip_arch.extract(filename, folder_path)) return result except BadZipfile as excep: self.logger.exception("Could not process zip file") raise FileStructureException("Invalid ZIP file..." + str(excep)) except Exception as excep: self.logger.exception("Could not process zip file") raise FileStructureException( "Could not unpack the given ZIP file..." + str(excep))
def move_datatype(self, datatype, new_project_name, new_op_id): """ Move H5 storage into a new location """ try: # TODO FOR LIA : CHECK CIRCULAR DEPENDENCY from tvb.core.neocom import h5 full_path = h5.path_for_stored_index(datatype) folder = self.get_project_folder(new_project_name, str(new_op_id)) full_new_file = os.path.join(folder, os.path.split(full_path)[1]) os.rename(full_path, full_new_file) except Exception: self.logger.exception("Could not move file") raise FileStructureException("Could not move " + str(datatype))
def check_created(self, path=TVBSettings.TVB_STORAGE): """ Check that the given folder exists, otherwise create it, with the entire tree of parent folders. This method is synchronized, for parallel access from events, to avoid conflicts. """ try: if not os.path.exists(path): self.logger.debug("Creating folder:" + str(path)) os.makedirs(path, mode=TVBSettings.ACCESS_MODE_TVB_FILES) os.chmod(path, TVBSettings.ACCESS_MODE_TVB_FILES) except OSError, excep: self.logger.error("COULD NOT CREATE FOLDER! CHECK ACCESS ON IT!") self.logger.exception(excep) raise FileStructureException("Could not create Folder" + str(path))
def check_created(self, path=TvbProfile.current.TVB_STORAGE): """ Check that the given folder exists, otherwise create it, with the entire tree of parent folders. This method is synchronized, for parallel access from events, to avoid conflicts. """ try: # if this is meant to be used concurrently it might be better to catch OSError 17 then checking exists if not os.path.exists(path): self.logger.debug("Creating folder:" + str(path)) os.makedirs(path, mode=TvbProfile.current.ACCESS_MODE_TVB_FILES) os.chmod(path, TvbProfile.current.ACCESS_MODE_TVB_FILES) except OSError: self.logger.exception("COULD NOT CREATE FOLDER! CHECK ACCESS ON IT!") raise FileStructureException("Could not create Folder" + str(path))
def remove_operation_data(self, project_name, operation_id): """ Remove H5 storage fully. """ try: complete_path = self.get_operation_folder(project_name, operation_id) self.logger.debug("Removing: " + str(complete_path)) if os.path.isdir(complete_path): shutil.rmtree(complete_path) elif os.path.exists(complete_path): os.remove(complete_path) except Exception: self.logger.exception("Could not remove files") raise FileStructureException("Could not remove files for OP" + str(operation_id))
def remove_datatype(self, datatype): """ Remove H5 storage fully. """ try: if os.path.exists(datatype.get_storage_file_path()): os.remove(datatype.get_storage_file_path()) else: self.logger.warning("Data file already removed:" + str(datatype.get_storage_file_path())) except Exception, excep: self.logger.error(excep) raise FileStructureException("Could not remove " + str(datatype))
def rename_project_structure(self, project_name, new_name): """ Rename Project folder or THROW FileStructureException. """ try: path = self.get_project_folder(project_name) folder = os.path.split(path)[0] new_full_name = os.path.join(folder, new_name) if os.path.exists(new_full_name): raise IOError("Path exists %s " % new_full_name) os.rename(path, new_full_name) return path, new_full_name except Exception: self.logger.exception("Could not rename node!") raise FileStructureException("Could not rename to %s" % new_name)
def unpack_zip(self, uploaded_zip, folder_path): """ Simple method to unpack ZIP archive in a given folder. """ try: zip_arch = zipfile.ZipFile(uploaded_zip) result = [] for filename in zip_arch.namelist(): new_file_name = os.path.join(folder_path, filename) src = zip_arch.open(filename, 'rU') if new_file_name.endswith('/'): os.makedirs(new_file_name) else: FilesHelper.copy_file(src, new_file_name) result.append(new_file_name) return result except BadZipfile, excep: self.logger.error(excep) raise FileStructureException("Invalid ZIP file...")
return True return False try: result = [] with ZipFile(uploaded_zip) as zip_arch: for filename in zip_arch.namelist(): if not to_be_excluded(filename): result.append(zip_arch.extract(filename, folder_path)) return result except BadZipfile, excep: self.logger.exception("Could not process zip file") raise FileStructureException("Invalid ZIP file..." + str(excep)) except Exception, excep: self.logger.exception("Could not process zip file") raise FileStructureException( "Could not unpack the given ZIP file..." + str(excep)) @staticmethod def copy_file(source, dest, dest_postfix=None, buffer_size=1024 * 1024): """ Copy a file from source to dest. source and dest can either be strings or any object with a read or write method, like StringIO for example. """ should_close_source = False should_close_dest = False try: if not hasattr(source, 'read'): source = open(source, 'rb') should_close_source = True
if not os.path.exists(complete_path): self.check_created(complete_path) return complete_path def rename_project_structure(self, project_name, new_name): """ Rename Project folder or THROW FileStructureException. """ try: path = self.get_project_folder(project_name) folder = os.path.split(path)[0] new_full_name = os.path.join(folder, new_name) except Exception, excep: self.logger.error("Could not rename node!") self.logger.exception(excep) raise FileStructureException("Could not Rename:" + str(new_name)) if os.path.exists(new_full_name): raise FileStructureException("File already used " + str(new_name) + " Can not add a duplicate!") try: os.rename(path, new_full_name) return path, new_full_name except Exception, excep: self.logger.error("Could not rename node!") self.logger.exception(excep) raise FileStructureException("Could not Rename: " + str(new_name)) def remove_project_structure(self, project_name): """ Remove all folders for project or THROW FileStructureException. """ try: complete_path = self.get_project_folder(project_name) if os.path.exists(complete_path): if os.path.isdir(complete_path): shutil.rmtree(complete_path)