def test_migrations(migration_data, tmp_path): """Test each migration method from the `aiida.tools.importexport.archive.migrations` module.""" version_old, (version_new, migration_method) = migration_data filepath_archive_new = get_archive_file(f'export_v{version_new}_simple.aiida', filepath='export/migrate') metadata_new = read_json_files(filepath_archive_new, names=['metadata.json'])[0] verify_metadata_version(metadata_new, version=version_new) data_new = read_json_files(filepath_archive_new, names=['data.json'])[0] filepath_archive_old = get_archive_file(f'export_v{version_old}_simple.aiida', filepath='export/migrate') out_path = tmp_path / 'out.aiida' with zipfile.ZipFile(filepath_archive_old, 'r', allowZip64=True) as handle: handle.extractall(out_path) folder = CacheFolder(out_path) migration_method(folder) _, metadata_old = folder.load_json('metadata.json') _, data_old = folder.load_json('data.json') verify_metadata_version(metadata_old, version=version_new) # Remove AiiDA version, since this may change regardless of the migration function metadata_old.pop('aiida_version') metadata_new.pop('aiida_version') # Assert conversion message in `metadata.json` is correct and then remove it for later assertions metadata_new.pop('conversion_info') message = f'Converted from version {version_old} to {version_new} with AiiDA v{get_version()}' assert metadata_old.pop('conversion_info')[-1] == message, 'Conversion message after migration is wrong' assert metadata_old == metadata_new assert data_old == data_new
def migration_trajectory_symbols_to_attribute(data: dict, folder: CacheFolder): """Apply migrations: 0026 - REV. 1.0.26 and 0027 - REV. 1.0.27 Create the symbols attribute from the repository array for all `TrajectoryData` nodes. """ from aiida.tools.importexport.common.config import NODES_EXPORT_SUBFOLDER path = folder.get_path(flush=False) for node_id, content in data['export_data'].get('Node', {}).items(): if content.get('type', '') == 'node.data.array.trajectory.TrajectoryData.': uuid = content['uuid'] symbols_path = path.joinpath(NODES_EXPORT_SUBFOLDER, uuid[0:2], uuid[2:4], uuid[4:], 'path', 'symbols.npy') symbols = np.load(os.path.abspath(symbols_path)).tolist() symbols_path.unlink() # Update 'node_attributes' data['node_attributes'][node_id].pop('array|symbols', None) data['node_attributes'][node_id]['symbols'] = symbols # Update 'node_attributes_conversion' data['node_attributes_conversion'][node_id].pop( 'array|symbols', None) data['node_attributes_conversion'][node_id]['symbols'] = [ None ] * len(symbols)
def migrate_v9_to_v10(folder: CacheFolder): """Migration of archive files from v0.9 to v0.10.""" old_version = '0.9' new_version = '0.10' _, metadata = folder.load_json('metadata.json') verify_metadata_version(metadata, old_version) update_metadata(metadata, new_version) metadata['all_fields_info']['Node']['attributes'] = { 'convert_type': 'jsonb' } metadata['all_fields_info']['Node']['extras'] = {'convert_type': 'jsonb'} metadata['all_fields_info']['Group']['extras'] = {'convert_type': 'jsonb'} folder.write_json('metadata.json', metadata)
def _perform_migration(self, work_dir: Path, pathway: List[str], out_compression: str, out_path: Optional[Union[str, Path]]) -> Path: """Perform the migration(s) in the work directory, compress (if necessary), then move to the out_path (if not None). """ MIGRATE_LOGGER.info('Extracting archive to work directory') extracted = Path(work_dir) / 'extracted' extracted.mkdir(parents=True) with get_progress_reporter()(total=1) as progress: callback = create_callback(progress) self._extract_archive(extracted, callback) with CacheFolder(extracted) as folder: with get_progress_reporter()( total=len(pathway), desc='Performing migrations: ') as progress: for from_version in pathway: to_version = MIGRATE_FUNCTIONS[from_version][0] progress.set_description_str( f'Performing migrations: {from_version} -> {to_version}', refresh=True) try: MIGRATE_FUNCTIONS[from_version][1](folder) except DanglingLinkError: raise ArchiveMigrationError( 'Archive file is invalid because it contains dangling links' ) progress.update() MIGRATE_LOGGER.debug('Flushing cache') # re-compress archive if out_compression != 'none': MIGRATE_LOGGER.info( f"Re-compressing archive as '{out_compression}'") migrated = work_dir / 'compressed' else: migrated = extracted if out_compression == 'zip': self._compress_archive_zip(extracted, migrated, zipfile.ZIP_DEFLATED) elif out_compression == 'zip-uncompressed': self._compress_archive_zip(extracted, migrated, zipfile.ZIP_STORED) elif out_compression == 'tar.gz': self._compress_archive_tar(extracted, migrated) if out_path is not None: # move to final location MIGRATE_LOGGER.info('Moving archive to: %s', out_path) self._move_file(migrated, Path(out_path)) return Path(out_path) if out_path else migrated
def migrate_v7_to_v8(folder: CacheFolder): """Migration of archive files from v0.7 to v0.8.""" old_version = '0.7' new_version = '0.8' _, metadata = folder.load_json('metadata.json') verify_metadata_version(metadata, old_version) update_metadata(metadata, new_version) _, data = folder.load_json('data.json') # Apply migrations migration_default_link_label(data) folder.write_json('metadata.json', metadata) folder.write_json('data.json', data)
def migrate_v8_to_v9(folder: CacheFolder): """Migration of archive files from v0.8 to v0.9.""" old_version = '0.8' new_version = '0.9' _, metadata = folder.load_json('metadata.json') verify_metadata_version(metadata, old_version) update_metadata(metadata, new_version) _, data = folder.load_json('data.json') # Apply migrations migration_dbgroup_type_string(data) folder.write_json('metadata.json', metadata) folder.write_json('data.json', data)
def migrate_v6_to_v7(folder: CacheFolder): """Migration of archive files from v0.6 to v0.7""" old_version = '0.6' new_version = '0.7' _, metadata = folder.load_json('metadata.json') verify_metadata_version(metadata, old_version) update_metadata(metadata, new_version) _, data = folder.load_json('data.json') # Apply migrations data_migration_legacy_process_attributes(data) remove_attribute_link_metadata(metadata) folder.write_json('metadata.json', metadata) folder.write_json('data.json', data)
def migrate_v5_to_v6(folder: CacheFolder): """Migration of archive files from v0.5 to v0.6""" old_version = '0.5' new_version = '0.6' _, metadata = folder.load_json('metadata.json') verify_metadata_version(metadata, old_version) update_metadata(metadata, new_version) _, data = folder.load_json('data.json') # Apply migrations migration_serialize_datetime_objects(data) migration_migrate_legacy_job_calculation_data(data) folder.write_json('metadata.json', metadata) folder.write_json('data.json', data)
def migrate_v4_to_v5(folder: CacheFolder): """ Migration of archive files from v0.4 to v0.5 This is from migration 0034 (drop_node_columns_nodeversion_public) and onwards """ old_version = '0.4' new_version = '0.5' _, metadata = folder.load_json('metadata.json') verify_metadata_version(metadata, old_version) update_metadata(metadata, new_version) _, data = folder.load_json('data.json') # Apply migrations migration_drop_node_columns_nodeversion_public(metadata, data) migration_drop_computer_transport_params(metadata, data) folder.write_json('metadata.json', metadata) folder.write_json('data.json', data)
def migrate_v1_to_v2(folder: CacheFolder): """ Migration of archive files from v0.1 to v0.2, which means generalizing the field names with respect to the database backend :param metadata: the content of an export archive metadata.json file :param data: the content of an export archive data.json file """ old_version = '0.1' new_version = '0.2' old_start = 'aiida.djsite' new_start = 'aiida.backends.djsite' _, metadata = folder.load_json('metadata.json') verify_metadata_version(metadata, old_version) update_metadata(metadata, new_version) _, data = folder.load_json('data.json') for field in ['export_data']: for key in list(data[field]): if key.startswith(old_start): new_key = get_new_string(key, old_start, new_start) data[field][new_key] = data[field][key] del data[field][key] for field in ['unique_identifiers', 'all_fields_info']: for key in list(metadata[field].keys()): if key.startswith(old_start): new_key = get_new_string(key, old_start, new_start) metadata[field][new_key] = metadata[field][key] del metadata[field][key] metadata['all_fields_info'] = replace_requires(metadata['all_fields_info'], old_start, new_start) folder.write_json('metadata.json', metadata) folder.write_json('data.json', data)
def migrate_v3_to_v4(folder: CacheFolder): """ Migration of archive files from v0.3 to v0.4 Note concerning migration 0032 - REV. 1.0.32: Remove legacy workflow tables: DbWorkflow, DbWorkflowData, DbWorkflowStep These were (according to Antimo Marrazzo) never exported. """ old_version = '0.3' new_version = '0.4' _, metadata = folder.load_json('metadata.json') verify_metadata_version(metadata, old_version) update_metadata(metadata, new_version) _, data = folder.load_json('data.json') # Apply migrations in correct sequential order migration_base_data_plugin_type_string(data) migration_process_type(metadata, data) migration_code_sub_class_of_data(data) migration_add_node_uuid_unique_constraint(data) migration_migrate_builtin_calculations(data) migration_provenance_redesign(data) migration_dbgroup_name_to_label_type_to_type_string(metadata, data) migration_dbgroup_type_string_change_content(data) migration_calc_job_option_attribute_keys(data) migration_move_data_within_node_module(data) migration_trajectory_symbols_to_attribute(data, folder) migration_remove_node_prefix(data) migration_rename_parameter_data_to_dict(data) migration_dbnode_type_to_dbnode_node_type(metadata, data) migration_remove_dbcomputer_enabled(metadata, data) migration_replace_text_field_with_json_field(data) # Add Node Extras add_extras(data) # Update metadata.json with the new Log and Comment entities new_entities = { 'Log': { 'uuid': {}, 'time': { 'convert_type': 'date' }, 'loggername': {}, 'levelname': {}, 'message': {}, 'metadata': {}, 'dbnode': { 'related_name': 'dblogs', 'requires': 'Node' } }, 'Comment': { 'uuid': {}, 'ctime': { 'convert_type': 'date' }, 'mtime': { 'convert_type': 'date' }, 'content': {}, 'dbnode': { 'related_name': 'dbcomments', 'requires': 'Node' }, 'user': { 'related_name': 'dbcomments', 'requires': 'User' } } } metadata['all_fields_info'].update(new_entities) metadata['unique_identifiers'].update({'Log': 'uuid', 'Comment': 'uuid'}) folder.write_json('metadata.json', metadata) folder.write_json('data.json', data)
def migrate_v2_to_v3(folder: CacheFolder): """ Migration of archive files from v0.2 to v0.3, which means adding the link types to the link entries and making the entity key names backend agnostic by effectively removing the prefix 'aiida.backends.djsite.db.models' :param data: the content of an export archive data.json file :param metadata: the content of an export archive metadata.json file """ old_version = '0.2' new_version = '0.3' class LinkType(enum.Enum): """This was the state of the `aiida.common.links.LinkType` enum before aiida-core v1.0.0a5""" UNSPECIFIED = 'unspecified' CREATE = 'createlink' RETURN = 'returnlink' INPUT = 'inputlink' CALL = 'calllink' class NodeType(enum.Enum): """A simple enum of relevant node types""" NONE = 'none' CALC = 'calculation' CODE = 'code' DATA = 'data' WORK = 'work' entity_map = { 'aiida.backends.djsite.db.models.DbNode': 'Node', 'aiida.backends.djsite.db.models.DbLink': 'Link', 'aiida.backends.djsite.db.models.DbGroup': 'Group', 'aiida.backends.djsite.db.models.DbComputer': 'Computer', 'aiida.backends.djsite.db.models.DbUser': '******', 'aiida.backends.djsite.db.models.DbAttribute': 'Attribute' } _, metadata = folder.load_json('metadata.json') verify_metadata_version(metadata, old_version) update_metadata(metadata, new_version) _, data = folder.load_json('data.json') # Create a mapping from node uuid to node type mapping = {} for nodes in data['export_data'].values(): for node in nodes.values(): try: node_uuid = node['uuid'] node_type_string = node['type'] except KeyError: continue if node_type_string.startswith('calculation.job.'): node_type = NodeType.CALC elif node_type_string.startswith('calculation.inline.'): node_type = NodeType.CALC elif node_type_string.startswith('code.Code'): node_type = NodeType.CODE elif node_type_string.startswith('data.'): node_type = NodeType.DATA elif node_type_string.startswith('calculation.work.'): node_type = NodeType.WORK else: node_type = NodeType.NONE mapping[node_uuid] = node_type # For each link, deduce the link type and insert it in place for link in data['links_uuid']: try: input_type = NodeType(mapping[link['input']]) output_type = NodeType(mapping[link['output']]) except KeyError: raise DanglingLinkError(f"Unknown node UUID {link['input']} or {link['output']}") # The following table demonstrates the logic for inferring the link type # (CODE, DATA) -> (WORK, CALC) : INPUT # (CALC) -> (DATA) : CREATE # (WORK) -> (DATA) : RETURN # (WORK) -> (CALC, WORK) : CALL if input_type in [NodeType.CODE, NodeType.DATA] and output_type in [NodeType.CALC, NodeType.WORK]: link['type'] = LinkType.INPUT.value elif input_type == NodeType.CALC and output_type == NodeType.DATA: link['type'] = LinkType.CREATE.value elif input_type == NodeType.WORK and output_type == NodeType.DATA: link['type'] = LinkType.RETURN.value elif input_type == NodeType.WORK and output_type in [NodeType.CALC, NodeType.WORK]: link['type'] = LinkType.CALL.value else: link['type'] = LinkType.UNSPECIFIED.value # Now we migrate the entity key names i.e. removing the 'aiida.backends.djsite.db.models' prefix for field in ['unique_identifiers', 'all_fields_info']: for old_key, new_key in entity_map.items(): if old_key in metadata[field]: metadata[field][new_key] = metadata[field][old_key] del metadata[field][old_key] # Replace the 'requires' keys in the nested dictionaries in 'all_fields_info' for entity in metadata['all_fields_info'].values(): for prop in entity.values(): for key, value in prop.items(): if key == 'requires' and value in entity_map: prop[key] = entity_map[value] # Replace any present keys in the data.json for field in ['export_data']: for old_key, new_key in entity_map.items(): if old_key in data[field]: data[field][new_key] = data[field][old_key] del data[field][old_key] folder.write_json('metadata.json', metadata) folder.write_json('data.json', data)