class TVBLinkedExporter(ABCExporter): """ """ def __init__(self): self.storage_interface = StorageInterface() def get_supported_types(self): return [DataType] def get_label(self): return "TVB Format with links" def export(self, data, data_export_folder, project): """ Exports data type: 1. If data is a normal data type, simply exports storage file (HDF format) 2. If data is a DataTypeGroup creates a zip with all files for all data types """ self.copy_dt_to_export_folder(data, data_export_folder) export_data_zip_path = self.get_export_data_zip_path( data, data_export_folder) return self.export_data_with_references(export_data_zip_path, data_export_folder) def get_export_data_zip_path(self, data, data_export_folder): zip_file_name = self.get_export_file_name(data) return os.path.join(os.path.dirname(data_export_folder), zip_file_name) def export_data_with_references(self, export_data_zip_path, data_export_folder): self.storage_interface.write_zip_folder(export_data_zip_path, data_export_folder) return None, export_data_zip_path, True def copy_dt_to_export_folder(self, data, data_export_folder): data_path = h5.path_for_stored_index(data) with H5File.from_file(data_path) as f: file_destination = os.path.join(data_export_folder, os.path.basename(data_path)) if not os.path.exists(file_destination): self.storage_interface.copy_file(data_path, file_destination) sub_dt_refs = f.gather_references() for _, ref_gid in sub_dt_refs: if ref_gid: dt = load.load_entity_by_gid(ref_gid) self.copy_dt_to_export_folder(dt, data_export_folder) H5File.remove_metadata_param(file_destination, 'parent_burst') def get_export_file_extension(self, data): return "zip" def skip_group_datatypes(self): return True
class TVBExporter(ABCExporter): """ This exporter simply provides for download data in TVB format """ def __init__(self): self.storage_interface = StorageInterface() def get_supported_types(self): return [DataType] def get_label(self): return "TVB Format" def export(self, data, export_folder, project): """ Exports data type: 1. If data is a normal data type, simply exports storage file (HDF format) 2. If data is a DataTypeGroup creates a zip with all files for all data types """ download_file_name = self.get_export_file_name(data) if self.is_data_a_group(data): all_datatypes = self._get_all_data_types_arr(data) if all_datatypes is None or len(all_datatypes) == 0: raise ExportException( "Could not export a data type group with no data") zip_file = os.path.join(export_folder, download_file_name) # Create ZIP archive self.storage_interface.zip_folders(all_datatypes, project.name, zip_file) return download_file_name, zip_file, True else: data_file = self.copy_dt_to_export_folder(data, export_folder) return None, data_file, True def copy_dt_to_export_folder(self, data, data_export_folder): data_path = h5.path_for_stored_index(data) file_destination = os.path.join(data_export_folder, os.path.basename(data_path)) if not os.path.exists(file_destination): self.storage_interface.copy_file(data_path, file_destination) H5File.remove_metadata_param(file_destination, 'parent_burst') return file_destination def get_export_file_extension(self, data): if self.is_data_a_group(data): return "zip" else: return "h5"
def import_conn_h5(project_id, h5_path): project = dao.get_project_by_id(project_id) TvbProfile.set_profile(TvbProfile.COMMAND_PROFILE) now = datetime.now() date_str = "%d-%d-%d_%d-%d-%d_%d" % (now.year, now.month, now.day, now.hour, now.minute, now.second, now.microsecond) uq_name = "%s-Connectivity" % date_str new_path = os.path.join(TvbProfile.current.TVB_TEMP_FOLDER, uq_name) StorageInterface.copy_file(h5_path, new_path) importer = ABCAdapter.build_adapter_from_class(TVBImporter) view_model = importer.get_view_model_class()() view_model.data_file = new_path return OperationService().fire_operation(importer, project.administrator, project_id, view_model=view_model)
class ExportManager(object): """ This class provides basic methods for exporting data types of projects in different formats. """ all_exporters = {} # Dictionary containing all available exporters export_folder = None EXPORT_FOLDER_NAME = "EXPORT_TMP" EXPORTED_SIMULATION_NAME = "exported_simulation" EXPORTED_SIMULATION_DTS_DIR = "datatypes" logger = get_logger(__name__) def __init__(self): # Here we register all available data type exporters # If new exporters supported, they should be added here self._register_exporter(TVBExporter()) self._register_exporter(TVBLinkedExporter()) self.export_folder = os.path.join(TvbProfile.current.TVB_STORAGE, self.EXPORT_FOLDER_NAME) self.storage_interface = StorageInterface() def _register_exporter(self, exporter): """ This method register into an internal format available exporters. :param exporter: Instance of a data type exporter (extends ABCExporter) """ if exporter is not None: self.all_exporters[exporter.__class__.__name__] = exporter def get_exporters_for_data(self, data): """ Get available exporters for current data type. :returns: a dictionary with the {exporter_id : label} """ if data is None: raise InvalidExportDataException("Could not detect exporters for null data") self.logger.debug("Trying to determine exporters valid for %s" % data.type) results = {} # No exporter for None data if data is None: return results for exporterId in self.all_exporters.keys(): exporter = self.all_exporters[exporterId] if exporter.accepts(data): results[exporterId] = exporter.get_label() return results def export_data(self, data, exporter_id, project): """ Export provided data using given exporter :param data: data type to be exported :param exporter_id: identifier of the exporter to be used :param project: project that contains data to be exported :returns: a tuple with the following elements 1. name of the file to be shown to user 2. full path of the export file (available for download) 3. boolean which specify if file can be deleted after download """ if data is None: raise InvalidExportDataException("Could not export null data. Please select data to be exported") if exporter_id is None: raise ExportException("Please select the exporter to be used for this operation") if exporter_id not in self.all_exporters: raise ExportException("Provided exporter identifier is not a valid one") exporter = self.all_exporters[exporter_id] if project is None: raise ExportException("Please provide the project where data files are stored") # Now we start the real export if not exporter.accepts(data): raise InvalidExportDataException("Current data can not be exported by specified exporter") # Now compute and create folder where to store exported data # This will imply to generate a folder which is unique for each export data_export_folder = None try: data_export_folder = self.storage_interface.build_data_export_folder(data, self.export_folder) self.logger.debug("Start export of data: %s" % data.type) export_data = exporter.export(data, data_export_folder, project) finally: # In case export did not generated any file delete folder if data_export_folder is not None and len(os.listdir(data_export_folder)) == 0: os.rmdir(data_export_folder) return export_data def _export_linked_datatypes(self, project): linked_paths = ProjectService().get_linked_datatypes_storage_path(project) if not linked_paths: # do not export an empty operation return None, None # Make an import operation which will contain links to other projects algo = dao.get_algorithm_by_module(TVB_IMPORTER_MODULE, TVB_IMPORTER_CLASS) op = model_operation.Operation(None, None, project.id, algo.id) op.project = project op.algorithm = algo op.id = 'links-to-external-projects' op.start_now() op.mark_complete(model_operation.STATUS_FINISHED) return linked_paths, op def export_project(self, project): """ Given a project root and the TVB storage_path, create a ZIP ready for export. :param project: project object which identifies project to be exported """ if project is None: raise ExportException("Please provide project to be exported") folders_to_exclude = self._get_op_with_errors(project.id) linked_paths, op = self._export_linked_datatypes(project) result_path = self.storage_interface.export_project(project, folders_to_exclude, self.export_folder, linked_paths, op) return result_path @staticmethod def _get_op_with_errors(project_id): """ Get the operation folders with error base name as list. """ operations = dao.get_operations_with_error_in_project(project_id) op_with_errors = [] for op in operations: op_with_errors.append(op.id) return op_with_errors def export_simulator_configuration(self, burst_id): burst = dao.get_burst_by_id(burst_id) if burst is None: raise InvalidExportDataException("Could not find burst with ID " + str(burst_id)) op_folder = self.storage_interface.get_project_folder(burst.project.name, str(burst.fk_simulation)) tmp_export_folder = self.storage_interface.build_data_export_folder(burst, self.export_folder) tmp_sim_folder = os.path.join(tmp_export_folder, self.EXPORTED_SIMULATION_NAME) if not os.path.exists(tmp_sim_folder): os.makedirs(tmp_sim_folder) all_view_model_paths, all_datatype_paths = h5.gather_references_of_view_model(burst.simulator_gid, op_folder) burst_path = h5.determine_filepath(burst.gid, op_folder) all_view_model_paths.append(burst_path) for vm_path in all_view_model_paths: dest = os.path.join(tmp_sim_folder, os.path.basename(vm_path)) self.storage_interface.copy_file(vm_path, dest) for dt_path in all_datatype_paths: dest = os.path.join(tmp_sim_folder, self.EXPORTED_SIMULATION_DTS_DIR, os.path.basename(dt_path)) self.storage_interface.copy_file(dt_path, dest) main_vm_path = h5.determine_filepath(burst.simulator_gid, tmp_sim_folder) H5File.remove_metadata_param(main_vm_path, 'history_gid') now = datetime.now() date_str = now.strftime("%Y-%m-%d_%H-%M") zip_file_name = "%s_%s.%s" % (date_str, str(burst_id), StorageInterface.ZIP_FILE_EXTENSION) result_path = os.path.join(tmp_export_folder, zip_file_name) self.storage_interface.write_zip_folder(result_path, tmp_sim_folder) self.storage_interface.remove_folder(tmp_sim_folder) return result_path
class FilesUpdateManager(UpdateManager): """ Manager for updating H5 files version, when code gets changed. """ UPDATE_SCRIPTS_SUFFIX = "_update_files" PROJECTS_PAGE_SIZE = 20 DATA_TYPES_PAGE_SIZE = 500 STATUS = True MESSAGE = "Done" def __init__(self): super(FilesUpdateManager, self).__init__(file_update_scripts, TvbProfile.current.version.DATA_CHECKED_TO_VERSION, TvbProfile.current.version.DATA_VERSION) self.storage_interface = StorageInterface() def get_file_data_version(self, file_path): """ Return the data version for the given file. :param file_path: the path on disk to the file for which you need the TVB data version :returns: a number representing the data version for which the input file was written """ data_version = TvbProfile.current.version.DATA_VERSION_ATTRIBUTE return self.storage_interface.get_storage_manager(file_path).get_file_data_version(data_version) def is_file_up_to_date(self, file_path): """ Returns True only if the data version of the file is equal with the data version specified into the TVB configuration file. """ try: file_version = self.get_file_data_version(file_path) except MissingDataFileException as ex: self.log.exception(ex) return False except FileStructureException as ex: self.log.exception(ex) return False if file_version == TvbProfile.current.version.DATA_VERSION: return True return False def upgrade_file(self, input_file_name, datatype=None, burst_match_dict=None): """ Upgrades the given file to the latest data version. The file will be upgraded sequentially, up until the current version from tvb.basic.config.settings.VersionSettings.DB_STRUCTURE_VERSION :param input_file_name the path to the file which needs to be upgraded :return True when update was successful and False when it resulted in an error. """ if self.is_file_up_to_date(input_file_name): # Avoid running the DB update of size, when H5 is not being changed, to speed-up return True file_version = self.get_file_data_version(input_file_name) self.log.info("Updating from version %s , file: %s " % (file_version, input_file_name)) for script_name in self.get_update_scripts(file_version): temp_file_path = os.path.join(TvbProfile.current.TVB_TEMP_FOLDER, os.path.basename(input_file_name) + '.tmp') self.storage_interface.copy_file(input_file_name, temp_file_path) try: self.run_update_script(script_name, input_file=input_file_name, burst_match_dict=burst_match_dict) except FileMigrationException as excep: self.storage_interface.copy_file(temp_file_path, input_file_name) os.remove(temp_file_path) self.log.error(excep) return False if datatype: # Compute and update the disk_size attribute of the DataType in DB: datatype.disk_size = self.storage_interface.compute_size_on_disk(input_file_name) dao.store_entity(datatype) return True def __upgrade_h5_list(self, h5_files): """ Upgrade a list of DataTypes to the current version. :returns: (nr_of_dts_upgraded_fine, nr_of_dts_ignored) a two-tuple of integers representing the number of DataTypes for which the upgrade worked fine, and the number of DataTypes for which the upgrade has failed. """ nr_of_dts_upgraded_fine = 0 nr_of_dts_failed = 0 burst_match_dict = {} for path in h5_files: update_result = self.upgrade_file(path, burst_match_dict=burst_match_dict) if update_result: nr_of_dts_upgraded_fine += 1 else: nr_of_dts_failed += 1 return nr_of_dts_upgraded_fine, nr_of_dts_failed # TO DO: We should migrate the older scripts to Python 3 if we want to support migration for versions < 4 def run_all_updates(self): """ Upgrades all the data types from TVB storage to the latest data version. :returns: a two entry tuple (status, message) where status is a boolean that is True in case the upgrade was successfully for all DataTypes and False otherwise, and message is a status update message. """ if TvbProfile.current.version.DATA_CHECKED_TO_VERSION < TvbProfile.current.version.DATA_VERSION: start_time = datetime.now() file_paths = self.get_all_h5_paths() total_count = len(file_paths) no_ok, no_error = self.__upgrade_h5_list(file_paths) self.log.info("Updated H5 files in total: %d [fine:%d, failed:%d in: %s min]" % ( total_count, no_ok, no_error, int((datetime.now() - start_time).seconds / 60))) delete_old_burst_table_after_migration() # Now update the configuration file since update was done config_file_update_dict = {stored.KEY_LAST_CHECKED_FILE_VERSION: TvbProfile.current.version.DATA_VERSION} if no_error == 0: # Everything went fine config_file_update_dict[stored.KEY_FILE_STORAGE_UPDATE_STATUS] = FILE_STORAGE_VALID FilesUpdateManager.STATUS = True FilesUpdateManager.MESSAGE = ("File upgrade finished successfully for all %s entries. " "Thank you for your patience!" % total_count) self.log.info(FilesUpdateManager.MESSAGE) else: # Keep track of how many DataTypes were properly updated and how many # were marked as invalid due to missing files or invalid manager. config_file_update_dict[stored.KEY_FILE_STORAGE_UPDATE_STATUS] = FILE_STORAGE_INVALID FilesUpdateManager.STATUS = False FilesUpdateManager.MESSAGE = ("Out of %s stored DataTypes, %s were upgraded successfully, but %s had " "faults and were marked invalid" % (total_count, no_ok, no_error)) self.log.warning(FilesUpdateManager.MESSAGE) TvbProfile.current.version.DATA_CHECKED_TO_VERSION = TvbProfile.current.version.DATA_VERSION TvbProfile.current.manager.add_entries_to_config_file(config_file_update_dict) @staticmethod def get_all_h5_paths(): """ This method returns a list of all h5 files and it is used in the migration from version 4 to 5. The h5 files inside a certain project are retrieved in numerical order (1, 2, 3 etc.). """ h5_files = [] projects_folder = StorageInterface().get_projects_folder() for project_path in os.listdir(projects_folder): # Getting operation folders inside the current project project_full_path = os.path.join(projects_folder, project_path) try: project_operations = os.listdir(project_full_path) except NotADirectoryError: continue project_operations_base_names = [os.path.basename(op) for op in project_operations] for op_folder in project_operations_base_names: try: int(op_folder) op_folder_path = os.path.join(project_full_path, op_folder) for file in os.listdir(op_folder_path): if StorageInterface().ends_with_tvb_storage_file_extension(file): h5_file = os.path.join(op_folder_path, file) try: if FilesUpdateManager._is_empty_file(h5_file): continue h5_files.append(h5_file) except FileStructureException: continue except ValueError: pass # Sort all h5 files based on their creation date stored in the files themselves sorted_h5_files = sorted(h5_files, key=lambda h5_path: FilesUpdateManager._get_create_date_for_sorting( h5_path) or datetime.now()) return sorted_h5_files @staticmethod def _is_empty_file(h5_file): return H5File.get_metadata_param(h5_file, 'Create_date') is None @staticmethod def _get_create_date_for_sorting(h5_file): create_date_str = str(H5File.get_metadata_param(h5_file, 'Create_date'), 'utf-8') create_date = string2date(create_date_str, date_format='datetime:%Y-%m-%d %H:%M:%S.%f') return create_date
class TestCSVConnectivityImporter(BaseTestCase): """ Unit-tests for csv connectivity importer. """ def setup_method(self): self.test_user = TestFactory.create_user() self.test_project = TestFactory.create_project(self.test_user) self.storage_interface = StorageInterface() def teardown_method(self): """ Clean-up tests data """ self.clean_database() def _import_csv_test_connectivity(self, reference_connectivity_gid, subject): ### First prepare input data: data_dir = path.abspath(path.dirname(tvb_data.__file__)) toronto_dir = path.join(data_dir, 'dti_pipeline_toronto') weights = path.join(toronto_dir, 'output_ConnectionCapacityMatrix.csv') tracts = path.join(toronto_dir, 'output_ConnectionDistanceMatrix.csv') weights_tmp = weights + '.tmp' tracts_tmp = tracts + '.tmp' self.storage_interface.copy_file(weights, weights_tmp) self.storage_interface.copy_file(tracts, tracts_tmp) view_model = CSVConnectivityImporterModel() view_model.weights = weights_tmp view_model.tracts = tracts_tmp view_model.data_subject = subject view_model.input_data = reference_connectivity_gid TestFactory.launch_importer(CSVConnectivityImporter, view_model, self.test_user, self.test_project, False) def test_happy_flow_import(self): """ Test that importing a CFF generates at least one DataType in DB. """ zip_path = path.join(path.dirname(tvb_data.__file__), 'connectivity', 'connectivity_96.zip') TestFactory.import_zip_connectivity(self.test_user, self.test_project, zip_path, subject=TEST_SUBJECT_A) field = FilterChain.datatype + '.subject' filters = FilterChain('', [field], [TEST_SUBJECT_A], ['==']) reference_connectivity_index = TestFactory.get_entity( self.test_project, ConnectivityIndex, filters) dt_count_before = TestFactory.get_entity_count(self.test_project, ConnectivityIndex) self._import_csv_test_connectivity(reference_connectivity_index.gid, TEST_SUBJECT_B) dt_count_after = TestFactory.get_entity_count(self.test_project, ConnectivityIndex) assert dt_count_before + 1 == dt_count_after filters = FilterChain('', [field], [TEST_SUBJECT_B], ['like']) imported_connectivity_index = TestFactory.get_entity( self.test_project, ConnectivityIndex, filters) # check relationship between the imported connectivity and the reference assert reference_connectivity_index.number_of_regions == imported_connectivity_index.number_of_regions assert not reference_connectivity_index.number_of_connections == imported_connectivity_index.number_of_connections reference_connectivity = h5.load_from_index( reference_connectivity_index) imported_connectivity = h5.load_from_index(imported_connectivity_index) assert not (reference_connectivity.weights == imported_connectivity.weights).all() assert not (reference_connectivity.tract_lengths == imported_connectivity.tract_lengths).all() assert (reference_connectivity.centres == imported_connectivity.centres ).all() assert (reference_connectivity.orientations == imported_connectivity.orientations).all() assert (reference_connectivity.region_labels == imported_connectivity.region_labels).all() def test_bad_reference(self): zip_path = path.join(path.dirname(tvb_data.__file__), 'connectivity', 'connectivity_66.zip') TestFactory.import_zip_connectivity(self.test_user, self.test_project, zip_path) field = FilterChain.datatype + '.subject' filters = FilterChain('', [field], [TEST_SUBJECT_A], ['!=']) bad_reference_connectivity = TestFactory.get_entity( self.test_project, ConnectivityIndex, filters) with pytest.raises(OperationException): self._import_csv_test_connectivity(bad_reference_connectivity.gid, TEST_SUBJECT_A)
class ImportService(object): """ Service for importing TVB entities into system. It supports TVB exported H5 files as input, but it should also handle H5 files generated outside of TVB, as long as they respect the same structure. """ def __init__(self): self.logger = get_logger(__name__) self.user_id = None self.storage_interface = StorageInterface() self.created_projects = [] self.view_model2adapter = self._populate_view_model2adapter() def _download_and_unpack_project_zip(self, uploaded, uq_file_name, temp_folder): if isinstance(uploaded, (FieldStorage, Part)): if not uploaded.file: raise ImportException( "Please select the archive which contains the project structure." ) with open(uq_file_name, 'wb') as file_obj: self.storage_interface.copy_file(uploaded.file, file_obj) else: shutil.copy2(uploaded, uq_file_name) try: self.storage_interface.unpack_zip(uq_file_name, temp_folder) except FileStructureException as excep: self.logger.exception(excep) raise ImportException( "Bad ZIP archive provided. A TVB exported project is expected!" ) @staticmethod def _compute_unpack_path(): """ :return: the name of the folder where to expand uploaded zip """ now = datetime.now() date_str = "%d-%d-%d_%d-%d-%d_%d" % (now.year, now.month, now.day, now.hour, now.minute, now.second, now.microsecond) uq_name = "%s-ImportProject" % date_str return os.path.join(TvbProfile.current.TVB_TEMP_FOLDER, uq_name) @transactional def import_project_structure(self, uploaded, user_id): """ Execute import operations: 1. check if ZIP or folder 2. find all project nodes 3. for each project node: - create project - create all operations and groups - import all images - create all dataTypes """ self.user_id = user_id self.created_projects = [] # Now compute the name of the folder where to explode uploaded ZIP file temp_folder = self._compute_unpack_path() uq_file_name = temp_folder + ".zip" try: self._download_and_unpack_project_zip(uploaded, uq_file_name, temp_folder) self._import_project_from_folder(temp_folder) except Exception as excep: self.logger.exception( "Error encountered during import. Deleting projects created during this operation." ) # Remove project folders created so far. # Note that using the project service to remove the projects will not work, # because we do not have support for nested transaction. # Removing from DB is not necessary because in transactional env a simple exception throw # will erase everything to be inserted. for project in self.created_projects: self.storage_interface.remove_project(project) raise ImportException(str(excep)) finally: # Now delete uploaded file and temporary folder where uploaded ZIP was exploded. self.storage_interface.remove_files([uq_file_name, temp_folder]) def _import_project_from_folder(self, temp_folder): """ Process each project from the uploaded pack, to extract names. """ temp_project_path = None for root, _, files in os.walk(temp_folder): if StorageInterface.TVB_PROJECT_FILE in files: temp_project_path = root break if temp_project_path is not None: update_manager = ProjectUpdateManager(temp_project_path) if update_manager.checked_version < 3: raise ImportException( 'Importing projects with versions older than 3 is not supported in TVB 2! ' 'Please import the project in TVB 1.5.8 and then launch the current version of ' 'TVB in order to upgrade this project!') update_manager.run_all_updates() project = self.__populate_project(temp_project_path) # Populate the internal list of create projects so far, for cleaning up folders, in case of failure self.created_projects.append(project) # Ensure project final folder exists on disk project_path = self.storage_interface.get_project_folder( project.name) shutil.move( os.path.join(temp_project_path, StorageInterface.TVB_PROJECT_FILE), project_path) # Now import project operations with their results self.import_list_of_operations(project, temp_project_path) # Import images and move them from temp into target self._store_imported_images(project, temp_project_path, project.name) if StorageInterface.encryption_enabled(): self.storage_interface.remove_project(project, True) def _load_datatypes_from_operation_folder(self, src_op_path, operation_entity, datatype_group): """ Loads datatypes from operation folder :returns: Datatype entities as dict {original_path: Dt instance} """ all_datatypes = {} for file_name in os.listdir(src_op_path): if self.storage_interface.ends_with_tvb_storage_file_extension( file_name): h5_file = os.path.join(src_op_path, file_name) try: file_update_manager = FilesUpdateManager() file_update_manager.upgrade_file(h5_file) datatype = self.load_datatype_from_file( h5_file, operation_entity.id, datatype_group, operation_entity.fk_launched_in) all_datatypes[h5_file] = datatype except IncompatibleFileManagerException: os.remove(h5_file) self.logger.warning( "Incompatible H5 file will be ignored: %s" % h5_file) self.logger.exception("Incompatibility details ...") return all_datatypes @staticmethod def check_import_references(file_path, datatype): h5_class = H5File.h5_class_from_file(file_path) reference_list = h5_class(file_path).gather_references() for _, reference_gid in reference_list: if not reference_gid: continue ref_index = load.load_entity_by_gid(reference_gid) if ref_index is None: os.remove(file_path) dao.remove_entity(datatype.__class__, datatype.id) raise MissingReferenceException( 'Imported file depends on datatypes that do not exist. Please upload ' 'those first!') def _store_or_link_burst_config(self, burst_config, bc_path, project_id): bc_already_in_tvb = dao.get_generic_entity(BurstConfiguration, burst_config.gid, 'gid') if len(bc_already_in_tvb) == 0: self.store_datatype(burst_config, bc_path) return 1 return 0 def store_or_link_datatype(self, datatype, dt_path, project_id): self.check_import_references(dt_path, datatype) stored_dt_count = 0 datatype_already_in_tvb = load.load_entity_by_gid(datatype.gid) if not datatype_already_in_tvb: self.store_datatype(datatype, dt_path) stored_dt_count = 1 elif datatype_already_in_tvb.parent_operation.project.id != project_id: AlgorithmService.create_link(datatype_already_in_tvb.id, project_id) if datatype_already_in_tvb.fk_datatype_group: AlgorithmService.create_link( datatype_already_in_tvb.fk_datatype_group, project_id) return stored_dt_count def _store_imported_datatypes_in_db(self, project, all_datatypes): # type: (Project, dict) -> int sorted_dts = sorted( all_datatypes.items(), key=lambda dt_item: dt_item[1].create_date or datetime.now()) count = 0 for dt_path, datatype in sorted_dts: count += self.store_or_link_datatype(datatype, dt_path, project.id) return count def _store_imported_images(self, project, temp_project_path, project_name): """ Import all images from project """ images_root = os.path.join(temp_project_path, StorageInterface.IMAGES_FOLDER) target_images_path = self.storage_interface.get_images_folder( project_name) for root, _, files in os.walk(images_root): for metadata_file in files: if self.storage_interface.ends_with_tvb_file_extension( metadata_file): self._import_image(root, metadata_file, project.id, target_images_path) @staticmethod def _populate_view_model2adapter(): if len(VIEW_MODEL2ADAPTER) > 0: return VIEW_MODEL2ADAPTER view_model2adapter = {} algos = dao.get_all_algorithms() for algo in algos: adapter = ABCAdapter.build_adapter(algo) view_model_class = adapter.get_view_model_class() view_model2adapter[view_model_class] = algo return view_model2adapter def _retrieve_operations_in_order(self, project, import_path, importer_operation_id=None): # type: (Project, str, int) -> list[Operation2ImportData] retrieved_operations = [] for root, _, files in os.walk(import_path): if OPERATION_XML in files: # Previous Operation format for uploading previous versions of projects operation_file_path = os.path.join(root, OPERATION_XML) operation, operation_xml_parameters, _ = self.build_operation_from_file( project, operation_file_path) operation.import_file = operation_file_path self.logger.debug("Found operation in old XML format: " + str(operation)) retrieved_operations.append( Operation2ImportData( operation, root, info_from_xml=operation_xml_parameters)) else: # We strive for the new format with ViewModelH5 main_view_model = None dt_paths = [] all_view_model_files = [] for file in files: if self.storage_interface.ends_with_tvb_storage_file_extension( file): h5_file = os.path.join(root, file) try: h5_class = H5File.h5_class_from_file(h5_file) if h5_class is ViewModelH5: all_view_model_files.append(h5_file) if not main_view_model: view_model = h5.load_view_model_from_file( h5_file) if type( view_model ) in self.view_model2adapter.keys(): main_view_model = view_model else: file_update_manager = FilesUpdateManager() file_update_manager.upgrade_file(h5_file) dt_paths.append(h5_file) except Exception: self.logger.warning( "Unreadable H5 file will be ignored: %s" % h5_file) if main_view_model is not None: alg = self.view_model2adapter[type(main_view_model)] op_group_id = None if main_view_model.operation_group_gid: op_group = dao.get_operationgroup_by_gid( main_view_model.operation_group_gid.hex) if not op_group: op_group = OperationGroup( project.id, ranges=json.loads(main_view_model.ranges), gid=main_view_model.operation_group_gid.hex) op_group = dao.store_entity(op_group) op_group_id = op_group.id operation = Operation( main_view_model.gid.hex, project.fk_admin, project.id, alg.id, status=STATUS_FINISHED, user_group=main_view_model.generic_attributes. operation_tag, start_date=datetime.now(), completion_date=datetime.now(), op_group_id=op_group_id, range_values=main_view_model.range_values) operation.create_date = main_view_model.create_date operation.visible = main_view_model.generic_attributes.visible self.logger.debug( "Found main ViewModel to create operation for it: " + str(operation)) retrieved_operations.append( Operation2ImportData(operation, root, main_view_model, dt_paths, all_view_model_files)) elif len(dt_paths) > 0: alg = dao.get_algorithm_by_module(TVB_IMPORTER_MODULE, TVB_IMPORTER_CLASS) default_adapter = ABCAdapter.build_adapter(alg) view_model = default_adapter.get_view_model_class()() view_model.data_file = dt_paths[0] vm_path = h5.store_view_model(view_model, root) all_view_model_files.append(vm_path) operation = Operation(view_model.gid.hex, project.fk_admin, project.id, alg.id, status=STATUS_FINISHED, start_date=datetime.now(), completion_date=datetime.now()) operation.create_date = datetime.min self.logger.debug( "Found no ViewModel in folder, so we default to " + str(operation)) if importer_operation_id: operation.id = importer_operation_id retrieved_operations.append( Operation2ImportData(operation, root, view_model, dt_paths, all_view_model_files, True)) return sorted(retrieved_operations, key=lambda op_data: op_data.order_field) def create_view_model(self, operation_entity, operation_data, new_op_folder, generic_attributes=None, add_params=None): view_model = self._get_new_form_view_model( operation_entity, operation_data.info_from_xml) if add_params is not None: for element in add_params: key_attr = getattr(view_model, element[0]) setattr(key_attr, element[1], element[2]) view_model.range_values = operation_entity.range_values op_group = dao.get_operationgroup_by_id( operation_entity.fk_operation_group) if op_group: view_model.operation_group_gid = uuid.UUID(op_group.gid) view_model.ranges = json.dumps(op_group.range_references) view_model.is_metric_operation = 'DatatypeMeasure' in op_group.name if generic_attributes is not None: view_model.generic_attributes = generic_attributes view_model.generic_attributes.operation_tag = operation_entity.user_group h5.store_view_model(view_model, new_op_folder) view_model_disk_size = StorageInterface.compute_recursive_h5_disk_usage( new_op_folder) operation_entity.view_model_disk_size = view_model_disk_size operation_entity.view_model_gid = view_model.gid.hex dao.store_entity(operation_entity) return view_model def import_list_of_operations(self, project, import_path, is_group=False, importer_operation_id=None): """ This method scans provided folder and identify all operations that needs to be imported """ all_dts_count = 0 all_stored_dts_count = 0 imported_operations = [] ordered_operations = self._retrieve_operations_in_order( project, import_path, None if is_group else importer_operation_id) if is_group and len(ordered_operations) > 0: first_op = dao.get_operation_by_id(importer_operation_id) vm_path = h5.determine_filepath(first_op.view_model_gid, os.path.dirname(import_path)) os.remove(vm_path) ordered_operations[0].operation.id = importer_operation_id for operation_data in ordered_operations: if operation_data.is_old_form: operation_entity, datatype_group = self.import_operation( operation_data.operation) new_op_folder = self.storage_interface.get_project_folder( project.name, str(operation_entity.id)) try: operation_datatypes = self._load_datatypes_from_operation_folder( operation_data.operation_folder, operation_entity, datatype_group) # Create and store view_model from operation self.create_view_model(operation_entity, operation_data, new_op_folder) self._store_imported_datatypes_in_db( project, operation_datatypes) imported_operations.append(operation_entity) except MissingReferenceException: operation_entity.status = STATUS_ERROR dao.store_entity(operation_entity) elif operation_data.main_view_model is not None: operation_data.operation.create_date = datetime.now() operation_data.operation.start_date = datetime.now() operation_data.operation.completion_date = datetime.now() do_merge = False if importer_operation_id: do_merge = True operation_entity = dao.store_entity(operation_data.operation, merge=do_merge) dt_group = None op_group = dao.get_operationgroup_by_id( operation_entity.fk_operation_group) if op_group: dt_group = dao.get_datatypegroup_by_op_group_id( op_group.id) if not dt_group: first_op = dao.get_operations_in_group( op_group.id, only_first_operation=True) dt_group = DataTypeGroup( op_group, operation_id=first_op.id, state=DEFAULTDATASTATE_INTERMEDIATE) dt_group = dao.store_entity(dt_group) # Store the DataTypes in db dts = {} all_dts_count += len(operation_data.dt_paths) for dt_path in operation_data.dt_paths: dt = self.load_datatype_from_file(dt_path, operation_entity.id, dt_group, project.id) if isinstance(dt, BurstConfiguration): if op_group: dt.fk_operation_group = op_group.id all_stored_dts_count += self._store_or_link_burst_config( dt, dt_path, project.id) else: dts[dt_path] = dt if op_group: op_group.fill_operationgroup_name(dt.type) dao.store_entity(op_group) try: stored_dts_count = self._store_imported_datatypes_in_db( project, dts) all_stored_dts_count += stored_dts_count if operation_data.main_view_model.is_metric_operation: self._update_burst_metric(operation_entity) imported_operations.append(operation_entity) new_op_folder = self.storage_interface.get_project_folder( project.name, str(operation_entity.id)) view_model_disk_size = 0 for h5_file in operation_data.all_view_model_files: view_model_disk_size += StorageInterface.compute_size_on_disk( h5_file) shutil.move(h5_file, new_op_folder) operation_entity.view_model_disk_size = view_model_disk_size dao.store_entity(operation_entity) except MissingReferenceException as excep: self.storage_interface.remove_operation_data( project.name, operation_entity.id) operation_entity.fk_operation_group = None dao.store_entity(operation_entity) dao.remove_entity(DataTypeGroup, dt_group.id) raise excep else: self.logger.warning( "Folder %s will be ignored, as we could not find a serialized " "operation or DTs inside!" % operation_data.operation_folder) # We want importer_operation_id to be kept just for the first operation (the first iteration) if is_group: importer_operation_id = None self._update_dt_groups(project.id) self._update_burst_configurations(project.id) return imported_operations, all_dts_count, all_stored_dts_count @staticmethod def _get_new_form_view_model(operation, xml_parameters): # type (Operation) -> ViewModel algo = dao.get_algorithm_by_id(operation.fk_from_algo) ad = ABCAdapter.build_adapter(algo) view_model = ad.get_view_model_class()() if xml_parameters: declarative_attrs = type(view_model).declarative_attrs if isinstance(xml_parameters, str): xml_parameters = json.loads(xml_parameters) for param in xml_parameters: new_param_name = param if param != '' and param[0] == "_": new_param_name = param[1:] new_param_name = new_param_name.lower() if new_param_name in declarative_attrs: try: setattr(view_model, new_param_name, xml_parameters[param]) except (TraitTypeError, TraitAttributeError): pass return view_model def _import_image(self, src_folder, metadata_file, project_id, target_images_path): """ Create and store a image entity. """ figure_dict = StorageInterface().read_metadata_from_xml( os.path.join(src_folder, metadata_file)) actual_figure = os.path.join( src_folder, os.path.split(figure_dict['file_path'])[1]) if not os.path.exists(actual_figure): self.logger.warning("Expected to find image path %s .Skipping" % actual_figure) return figure_dict['fk_user_id'] = self.user_id figure_dict['fk_project_id'] = project_id figure_entity = manager_of_class(ResultFigure).new_instance() figure_entity = figure_entity.from_dict(figure_dict) stored_entity = dao.store_entity(figure_entity) # Update image meta-data with the new details after import figure = dao.load_figure(stored_entity.id) shutil.move(actual_figure, target_images_path) self.logger.debug("Store imported figure") _, meta_data = figure.to_dict() self.storage_interface.write_image_metadata(figure, meta_data) def load_datatype_from_file(self, current_file, op_id, datatype_group=None, current_project_id=None): # type: (str, int, DataTypeGroup, int) -> HasTraitsIndex """ Creates an instance of datatype from storage / H5 file :returns: DatatypeIndex """ self.logger.debug("Loading DataType from file: %s" % current_file) h5_class = H5File.h5_class_from_file(current_file) if h5_class is BurstConfigurationH5: if current_project_id is None: op_entity = dao.get_operationgroup_by_id(op_id) current_project_id = op_entity.fk_launched_in h5_file = BurstConfigurationH5(current_file) burst = BurstConfiguration(current_project_id) burst.fk_simulation = op_id h5_file.load_into(burst) result = burst else: datatype, generic_attributes = h5.load_with_links(current_file) already_existing_datatype = h5.load_entity_by_gid(datatype.gid) if datatype_group is not None and already_existing_datatype is not None: raise DatatypeGroupImportException( "The datatype group that you are trying to import" " already exists!") index_class = h5.REGISTRY.get_index_for_datatype( datatype.__class__) datatype_index = index_class() datatype_index.fill_from_has_traits(datatype) datatype_index.fill_from_generic_attributes(generic_attributes) if datatype_group is not None and hasattr(datatype_index, 'fk_source_gid') and \ datatype_index.fk_source_gid is not None: ts = h5.load_entity_by_gid(datatype_index.fk_source_gid) if ts is None: op = dao.get_operations_in_group( datatype_group.fk_operation_group, only_first_operation=True) op.fk_operation_group = None dao.store_entity(op) dao.remove_entity(OperationGroup, datatype_group.fk_operation_group) dao.remove_entity(DataTypeGroup, datatype_group.id) raise DatatypeGroupImportException( "Please import the time series group before importing the" " datatype measure group!") # Add all the required attributes if datatype_group: datatype_index.fk_datatype_group = datatype_group.id if len(datatype_group.subject) == 0: datatype_group.subject = datatype_index.subject dao.store_entity(datatype_group) datatype_index.fk_from_operation = op_id associated_file = h5.path_for_stored_index(datatype_index) if os.path.exists(associated_file): datatype_index.disk_size = StorageInterface.compute_size_on_disk( associated_file) result = datatype_index return result def store_datatype(self, datatype, current_file=None): """This method stores data type into DB""" try: self.logger.debug("Store datatype: %s with Gid: %s" % (datatype.__class__.__name__, datatype.gid)) # Now move storage file into correct folder if necessary if current_file is not None: final_path = h5.path_for_stored_index(datatype) if final_path != current_file: shutil.move(current_file, final_path) stored_entry = load.load_entity_by_gid(datatype.gid) if not stored_entry: stored_entry = dao.store_entity(datatype) return stored_entry except MissingDataSetException as e: self.logger.exception(e) error_msg = "Datatype %s has missing data and could not be imported properly." % ( datatype, ) raise ImportException(error_msg) except IntegrityError as excep: self.logger.exception(excep) error_msg = "Could not import data with gid: %s. There is already a one with " \ "the same name or gid." % datatype.gid raise ImportException(error_msg) def __populate_project(self, project_path): """ Create and store a Project entity. """ self.logger.debug("Creating project from path: %s" % project_path) project_dict = self.storage_interface.read_project_metadata( project_path) project_entity = manager_of_class(Project).new_instance() project_entity = project_entity.from_dict(project_dict, self.user_id) try: self.logger.debug("Storing imported project") return dao.store_entity(project_entity) except IntegrityError as excep: self.logger.exception(excep) error_msg = ( "Could not import project: %s with gid: %s. There is already a " "project with the same name or gid.") % (project_entity.name, project_entity.gid) raise ImportException(error_msg) def build_operation_from_file(self, project, operation_file): """ Create Operation entity from metadata file. """ operation_dict = StorageInterface().read_metadata_from_xml( operation_file) operation_entity = manager_of_class(Operation).new_instance() return operation_entity.from_dict(operation_dict, dao, self.user_id, project.gid) @staticmethod def import_operation(operation_entity, migration=False): """ Store a Operation entity. """ do_merge = False if operation_entity.id: do_merge = True operation_entity = dao.store_entity(operation_entity, merge=do_merge) operation_group_id = operation_entity.fk_operation_group datatype_group = None if operation_group_id is not None: datatype_group = dao.get_datatypegroup_by_op_group_id( operation_group_id) if datatype_group is None and migration is False: # If no dataType group present for current op. group, create it. operation_group = dao.get_operationgroup_by_id( operation_group_id) datatype_group = DataTypeGroup( operation_group, operation_id=operation_entity.id) datatype_group.state = UploadAlgorithmCategoryConfig.defaultdatastate datatype_group = dao.store_entity(datatype_group) return operation_entity, datatype_group def import_simulator_configuration_zip(self, zip_file): # Now compute the name of the folder where to explode uploaded ZIP file temp_folder = self._compute_unpack_path() uq_file_name = temp_folder + ".zip" if isinstance(zip_file, (FieldStorage, Part)): if not zip_file.file: raise ServicesBaseException( "Could not process the given ZIP file...") with open(uq_file_name, 'wb') as file_obj: self.storage_interface.copy_file(zip_file.file, file_obj) else: shutil.copy2(zip_file, uq_file_name) try: self.storage_interface.unpack_zip(uq_file_name, temp_folder) return temp_folder except FileStructureException as excep: raise ServicesBaseException( "Could not process the given ZIP file..." + str(excep)) @staticmethod def _update_burst_metric(operation_entity): burst_config = dao.get_burst_for_operation_id(operation_entity.id) if burst_config and burst_config.ranges: if burst_config.fk_metric_operation_group is None: burst_config.fk_metric_operation_group = operation_entity.fk_operation_group dao.store_entity(burst_config) @staticmethod def _update_dt_groups(project_id): dt_groups = dao.get_datatypegroup_for_project(project_id) for dt_group in dt_groups: dt_group.count_results = dao.count_datatypes_in_group(dt_group.id) dts_in_group = dao.get_datatypes_from_datatype_group(dt_group.id) if dts_in_group: dt_group.fk_parent_burst = dts_in_group[0].fk_parent_burst dao.store_entity(dt_group) @staticmethod def _update_burst_configurations(project_id): burst_configs = dao.get_bursts_for_project(project_id) for burst_config in burst_configs: burst_config.datatypes_number = dao.count_datatypes_in_burst( burst_config.gid) dao.store_entity(burst_config)
class ExportManager(object): """ This class provides basic methods for exporting data types of projects in different formats. """ all_exporters = {} # Dictionary containing all available exporters logger = get_logger(__name__) def __init__(self): # Here we register all available data type exporters # If new exporters supported, they should be added here self._register_exporter(TVBExporter()) self._register_exporter(TVBLinkedExporter()) self.storage_interface = StorageInterface() def _register_exporter(self, exporter): """ This method register into an internal format available exporters. :param exporter: Instance of a data type exporter (extends ABCExporter) """ if exporter is not None: self.all_exporters[exporter.__class__.__name__] = exporter def get_exporters_for_data(self, data): """ Get available exporters for current data type. :returns: a dictionary with the {exporter_id : label} """ if data is None: raise InvalidExportDataException("Could not detect exporters for null data") self.logger.debug("Trying to determine exporters valid for %s" % data.type) results = {} # No exporter for None data if data is None: return results for exporterId in self.all_exporters.keys(): exporter = self.all_exporters[exporterId] if exporter.accepts(data): results[exporterId] = exporter.get_label() return results def export_data(self, data, exporter_id, project, user_public_key=None): """ Export provided data using given exporter :param data: data type to be exported :param exporter_id: identifier of the exporter to be used :param project: project that contains data to be exported :param user_public_key: public key file used for encrypting data before exporting :returns: a tuple with the following elements 1. name of the file to be shown to user 2. full path of the export file (available for download) 3. boolean which specify if file can be deleted after download """ if data is None: raise InvalidExportDataException("Could not export null data. Please select data to be exported") if exporter_id is None: raise ExportException("Please select the exporter to be used for this operation") if exporter_id not in self.all_exporters: raise ExportException("Provided exporter identifier is not a valid one") exporter = self.all_exporters[exporter_id] if user_public_key is not None: public_key_path, encryption_password = self.storage_interface.prepare_encryption(project.name) if isinstance(user_public_key, (FieldStorage, Part)): with open(public_key_path, 'wb') as file_obj: self.storage_interface.copy_file(user_public_key.file, file_obj) else: shutil.copy2(user_public_key, public_key_path) else: public_key_path, encryption_password = None, None if project is None: raise ExportException("Please provide the project where data files are stored") # Now we start the real export if not exporter.accepts(data): raise InvalidExportDataException("Current data can not be exported by specified exporter") # Now compute and create folder where to store exported data # This will imply to generate a folder which is unique for each export export_data = None try: self.logger.debug("Start export of data: %s" % data.type) export_data = exporter.export(data, project, public_key_path, encryption_password) except Exception: pass return export_data @staticmethod def _get_paths_of_linked_datatypes(project): linked_paths = ProjectService().get_linked_datatypes_storage_path(project) if not linked_paths: # do not export an empty operation return None, None # Make an import operation which will contain links to other projects algo = dao.get_algorithm_by_module(TVB_IMPORTER_MODULE, TVB_IMPORTER_CLASS) op = model_operation.Operation(None, None, project.id, algo.id) op.project = project op.algorithm = algo op.id = 'links-to-external-projects' op.start_now() op.mark_complete(model_operation.STATUS_FINISHED) return linked_paths, op def export_project(self, project): """ Given a project root and the TVB storage_path, create a ZIP ready for export. :param project: project object which identifies project to be exported """ if project is None: raise ExportException("Please provide project to be exported") folders_to_exclude = self._get_op_with_errors(project.id) linked_paths, op = self._get_paths_of_linked_datatypes(project) result_path = self.storage_interface.export_project(project, folders_to_exclude, linked_paths, op) return result_path @staticmethod def _get_op_with_errors(project_id): """ Get the operation folders with error base name as list. """ operations = dao.get_operations_with_error_in_project(project_id) op_with_errors = [] for op in operations: op_with_errors.append(op.id) return op_with_errors def export_simulator_configuration(self, burst_id): burst = dao.get_burst_by_id(burst_id) if burst is None: raise InvalidExportDataException("Could not find burst with ID " + str(burst_id)) op_folder = self.storage_interface.get_project_folder(burst.project.name, str(burst.fk_simulation)) all_view_model_paths, all_datatype_paths = h5.gather_references_of_view_model(burst.simulator_gid, op_folder) burst_path = h5.determine_filepath(burst.gid, op_folder) all_view_model_paths.append(burst_path) zip_filename = ABCExporter.get_export_file_name(burst, self.storage_interface.TVB_ZIP_FILE_EXTENSION) result_path = self.storage_interface.export_simulator_configuration(burst, all_view_model_paths, all_datatype_paths, zip_filename) return result_path