def update(input_file, burst_match_dict=None): """ In order to avoid segmentation faults when updating a batch of files just start every conversion on a different Python process. :param input_file: the file that needs to be converted to a newer file storage version. This should be a file that still uses TVB 2.0 storage """ if not os.path.isfile(input_file): raise FileVersioningException("The input path %s received for upgrading from 2 -> 3 is not a " "valid file on the disk." % input_file) folder, file_name = os.path.split(input_file) storage_manager = StorageInterface.get_storage_manager(input_file) root_metadata = storage_manager.get_metadata() class_name = root_metadata[DataTypeMetaData.KEY_CLASS_NAME] if class_name == "LocalConnectivity": root_metadata[DataTypeMetaData.KEY_MODULE] = "tvb.datatypes.local_connectivity" storage_manager.set_metadata(root_metadata) update_localconnectivity_metadata(folder, file_name) elif class_name == "RegionMapping": root_metadata[DataTypeMetaData.KEY_MODULE] = "tvb.datatypes.region_mapping" root_metadata[TvbProfile.current.version.DATA_VERSION_ATTRIBUTE] = TvbProfile.current.version.DATA_VERSION storage_manager.set_metadata(root_metadata)
def __init__(self, path): # type: (str) -> None self.path = path self.storage_manager = StorageInterface.get_storage_manager(self.path) # would be nice to have an opened state for the chunked api instead of the close_file=False # common scalar headers self.gid = Uuid(HasTraits.gid, self) self.written_by = Scalar(Attr(str), self, name=self.KEY_WRITTEN_BY) self.create_date = Scalar(Attr(str), self, name='create_date') self.type = Scalar(Attr(str), self, name='type') # Generic attributes descriptors self.generic_attributes = GenericAttributes() self.invalid = Scalar(Attr(bool), self, name='invalid') self.is_nan = Scalar(Attr(bool), self, name='is_nan') self.subject = Scalar(Attr(str), self, name='subject') self.state = Scalar(Attr(str), self, name='state') self.user_tag_1 = Scalar(Attr(str), self, name='user_tag_1') self.user_tag_2 = Scalar(Attr(str), self, name='user_tag_2') self.user_tag_3 = Scalar(Attr(str), self, name='user_tag_3') self.user_tag_4 = Scalar(Attr(str), self, name='user_tag_4') self.user_tag_5 = Scalar(Attr(str), self, name='user_tag_5') self.operation_tag = Scalar(Attr(str, required=False), self, name='operation_tag') self.parent_burst = Uuid(Attr(uuid.UUID, required=False), self, name='parent_burst') self.visible = Scalar(Attr(bool), self, name='visible') self.metadata_cache = None # Keep a list with datasets for which we should write metadata before closing the file self.expandable_datasets = [] if not self.storage_manager.is_valid_tvb_file(): self.written_by.store(self.get_class_path()) self.is_new_file = True
class FilesUpdateManager(UpdateManager): """ Manager for updating H5 files version, when code gets changed. """ UPDATE_SCRIPTS_SUFFIX = "_update_files" PROJECTS_PAGE_SIZE = 20 DATA_TYPES_PAGE_SIZE = 500 STATUS = True MESSAGE = "Done" def __init__(self): super(FilesUpdateManager, self).__init__(file_update_scripts, TvbProfile.current.version.DATA_CHECKED_TO_VERSION, TvbProfile.current.version.DATA_VERSION) self.storage_interface = StorageInterface() def get_file_data_version(self, file_path): """ Return the data version for the given file. :param file_path: the path on disk to the file for which you need the TVB data version :returns: a number representing the data version for which the input file was written """ data_version = TvbProfile.current.version.DATA_VERSION_ATTRIBUTE return self.storage_interface.get_storage_manager(file_path).get_file_data_version(data_version) def is_file_up_to_date(self, file_path): """ Returns True only if the data version of the file is equal with the data version specified into the TVB configuration file. """ try: file_version = self.get_file_data_version(file_path) except MissingDataFileException as ex: self.log.exception(ex) return False except FileStructureException as ex: self.log.exception(ex) return False if file_version == TvbProfile.current.version.DATA_VERSION: return True return False def upgrade_file(self, input_file_name, datatype=None, burst_match_dict=None): """ Upgrades the given file to the latest data version. The file will be upgraded sequentially, up until the current version from tvb.basic.config.settings.VersionSettings.DB_STRUCTURE_VERSION :param input_file_name the path to the file which needs to be upgraded :return True when update was successful and False when it resulted in an error. """ if self.is_file_up_to_date(input_file_name): # Avoid running the DB update of size, when H5 is not being changed, to speed-up return True file_version = self.get_file_data_version(input_file_name) self.log.info("Updating from version %s , file: %s " % (file_version, input_file_name)) for script_name in self.get_update_scripts(file_version): temp_file_path = os.path.join(TvbProfile.current.TVB_TEMP_FOLDER, os.path.basename(input_file_name) + '.tmp') self.storage_interface.copy_file(input_file_name, temp_file_path) try: self.run_update_script(script_name, input_file=input_file_name, burst_match_dict=burst_match_dict) except FileMigrationException as excep: self.storage_interface.copy_file(temp_file_path, input_file_name) os.remove(temp_file_path) self.log.error(excep) return False if datatype: # Compute and update the disk_size attribute of the DataType in DB: datatype.disk_size = self.storage_interface.compute_size_on_disk(input_file_name) dao.store_entity(datatype) return True def __upgrade_h5_list(self, h5_files): """ Upgrade a list of DataTypes to the current version. :returns: (nr_of_dts_upgraded_fine, nr_of_dts_ignored) a two-tuple of integers representing the number of DataTypes for which the upgrade worked fine, and the number of DataTypes for which the upgrade has failed. """ nr_of_dts_upgraded_fine = 0 nr_of_dts_failed = 0 burst_match_dict = {} for path in h5_files: update_result = self.upgrade_file(path, burst_match_dict=burst_match_dict) if update_result: nr_of_dts_upgraded_fine += 1 else: nr_of_dts_failed += 1 return nr_of_dts_upgraded_fine, nr_of_dts_failed # TO DO: We should migrate the older scripts to Python 3 if we want to support migration for versions < 4 def run_all_updates(self): """ Upgrades all the data types from TVB storage to the latest data version. :returns: a two entry tuple (status, message) where status is a boolean that is True in case the upgrade was successfully for all DataTypes and False otherwise, and message is a status update message. """ if TvbProfile.current.version.DATA_CHECKED_TO_VERSION < TvbProfile.current.version.DATA_VERSION: start_time = datetime.now() file_paths = self.get_all_h5_paths() total_count = len(file_paths) no_ok, no_error = self.__upgrade_h5_list(file_paths) self.log.info("Updated H5 files in total: %d [fine:%d, failed:%d in: %s min]" % ( total_count, no_ok, no_error, int((datetime.now() - start_time).seconds / 60))) delete_old_burst_table_after_migration() # Now update the configuration file since update was done config_file_update_dict = {stored.KEY_LAST_CHECKED_FILE_VERSION: TvbProfile.current.version.DATA_VERSION} if no_error == 0: # Everything went fine config_file_update_dict[stored.KEY_FILE_STORAGE_UPDATE_STATUS] = FILE_STORAGE_VALID FilesUpdateManager.STATUS = True FilesUpdateManager.MESSAGE = ("File upgrade finished successfully for all %s entries. " "Thank you for your patience!" % total_count) self.log.info(FilesUpdateManager.MESSAGE) else: # Keep track of how many DataTypes were properly updated and how many # were marked as invalid due to missing files or invalid manager. config_file_update_dict[stored.KEY_FILE_STORAGE_UPDATE_STATUS] = FILE_STORAGE_INVALID FilesUpdateManager.STATUS = False FilesUpdateManager.MESSAGE = ("Out of %s stored DataTypes, %s were upgraded successfully, but %s had " "faults and were marked invalid" % (total_count, no_ok, no_error)) self.log.warning(FilesUpdateManager.MESSAGE) TvbProfile.current.version.DATA_CHECKED_TO_VERSION = TvbProfile.current.version.DATA_VERSION TvbProfile.current.manager.add_entries_to_config_file(config_file_update_dict) @staticmethod def get_all_h5_paths(): """ This method returns a list of all h5 files and it is used in the migration from version 4 to 5. The h5 files inside a certain project are retrieved in numerical order (1, 2, 3 etc.). """ h5_files = [] projects_folder = StorageInterface().get_projects_folder() for project_path in os.listdir(projects_folder): # Getting operation folders inside the current project project_full_path = os.path.join(projects_folder, project_path) try: project_operations = os.listdir(project_full_path) except NotADirectoryError: continue project_operations_base_names = [os.path.basename(op) for op in project_operations] for op_folder in project_operations_base_names: try: int(op_folder) op_folder_path = os.path.join(project_full_path, op_folder) for file in os.listdir(op_folder_path): if StorageInterface().ends_with_tvb_storage_file_extension(file): h5_file = os.path.join(op_folder_path, file) try: if FilesUpdateManager._is_empty_file(h5_file): continue h5_files.append(h5_file) except FileStructureException: continue except ValueError: pass # Sort all h5 files based on their creation date stored in the files themselves sorted_h5_files = sorted(h5_files, key=lambda h5_path: FilesUpdateManager._get_create_date_for_sorting( h5_path) or datetime.now()) return sorted_h5_files @staticmethod def _is_empty_file(h5_file): return H5File.get_metadata_param(h5_file, 'Create_date') is None @staticmethod def _get_create_date_for_sorting(h5_file): create_date_str = str(H5File.get_metadata_param(h5_file, 'Create_date'), 'utf-8') create_date = string2date(create_date_str, date_format='datetime:%Y-%m-%d %H:%M:%S.%f') return create_date
def get_metadata_param(path, param): meta = StorageInterface.get_storage_manager(path).get_metadata() return meta.get(param)
def update(input_file, burst_match_dict=None): """ :param input_file: the file that needs to be converted to a newer file storage version. """ if not os.path.isfile(input_file): raise IncompatibleFileManagerException( "The input path %s received for upgrading from 3 -> 4 is not a " "valid file on the disk." % input_file) folder, file_name = os.path.split(input_file) storage_manager = StorageInterface.get_storage_manager(input_file) root_metadata = storage_manager.get_metadata() if DataTypeMetaData.KEY_CLASS_NAME not in root_metadata: raise IncompatibleFileManagerException( "File %s received for upgrading 3 -> 4 is not valid, due to missing " "metadata: %s" % (input_file, DataTypeMetaData.KEY_CLASS_NAME)) class_name = root_metadata[DataTypeMetaData.KEY_CLASS_NAME] class_name = str(class_name, 'utf-8') if "ProjectionSurface" in class_name and FIELD_PROJECTION_TYPE not in root_metadata: LOGGER.info("Updating ProjectionSurface %s from %s" % (file_name, folder)) projection_type = ProjectionsType.EEG.value if "SEEG" in class_name: projection_type = ProjectionsType.SEEG.value elif "MEG" in class_name: projection_type = ProjectionsType.MEG.value root_metadata[FIELD_PROJECTION_TYPE] = json.dumps(projection_type) LOGGER.debug("Setting %s = %s" % (FIELD_PROJECTION_TYPE, projection_type)) elif "TimeSeries" in class_name: LOGGER.info("Updating TS %s from %s" % (file_name, folder)) service = ImportService() try: operation_id = int(os.path.split(folder)[1]) dt = service.load_datatype_from_file( os.path.join(folder, file_name), operation_id) dt_db = dao.get_datatype_by_gid(dt.gid) except ValueError: dt_db = None if dt_db is not None: # DT already in DB (update of own storage, by making sure all fields are being correctly populated) dt_db.configure() dt_db.persist_full_metadata() try: # restore in DB, in case TVB 1.4 had wrongly imported flags dao.store_entity(dt_db) except Exception: LOGGER.exception( "Could not update flags in DB, but we continue with the update!" ) elif FIELD_SURFACE_MAPPING not in root_metadata: # Have default values, to avoid the full project not being imported root_metadata[FIELD_SURFACE_MAPPING] = json.dumps(False) root_metadata[FIELD_VOLUME_MAPPING] = json.dumps(False) root_metadata[ TvbProfile.current.version. DATA_VERSION_ATTRIBUTE] = TvbProfile.current.version.DATA_VERSION storage_manager.set_metadata(root_metadata)
def remove_metadata_param(file_path, param): storage_manager = StorageInterface.get_storage_manager(file_path) if param in storage_manager.get_metadata(): storage_manager.remove_metadata(param)