def migrate_infer_calculation_entry_point(apps, schema_editor): """Set the process type for calculation nodes by inferring it from their type string.""" from aiida.manage.database.integrity import write_database_integrity_violation from aiida.manage.database.integrity.plugins import infer_calculation_entry_point from aiida.plugins.entry_point import ENTRY_POINT_STRING_SEPARATOR fallback_cases = [] DbNode = apps.get_model('db', 'DbNode') type_strings = DbNode.objects.filter(type__startswith='calculation.').values_list('type', flat=True) mapping_node_type_to_entry_point = infer_calculation_entry_point(type_strings=type_strings) for type_string, entry_point_string in mapping_node_type_to_entry_point.items(): # If the entry point string does not contain the entry point string separator, the mapping function was not able # to map the type string onto a known entry point string. As a fallback it uses the modified type string itself. # All affected entries should be logged to file that the user can consult. if ENTRY_POINT_STRING_SEPARATOR not in entry_point_string: query_set = DbNode.objects.filter(type=type_string).values_list('uuid') uuids = [str(entry[0]) for entry in query_set] for uuid in uuids: fallback_cases.append([uuid, type_string, entry_point_string]) DbNode.objects.filter(type=type_string).update(process_type=entry_point_string) if fallback_cases: headers = ['UUID', 'type (old)', 'process_type (fallback)'] warning_message = 'found calculation nodes with a type string that could not be mapped onto a known entry point' action_message = 'inferred `process_type` for all calculation nodes, using fallback for unknown entry points' write_database_integrity_violation(fallback_cases, headers, warning_message, action_message)
def delete_wrong_links(node_uuids, link_type, headers, warning_message, action_message): """delete links that are matching link_type and are going from nodes listed in node_uuids""" violations = [] new_links_list = [] for link in data['links_uuid']: if link['input'] in node_uuids and link['type'] == link_type: violations.append([link['input'], link['output'], link['type'], link['label']]) else: new_links_list.append(link) data['links_uuid'] = new_links_list if violations: write_database_integrity_violation(violations, headers, warning_message, action_message)
def detect_unexpected_links(connection): """Scan the database for any links that are unexpected. The checks will verify that there are no outgoing `call` or `return` links from calculation nodes and that if a workflow node has a `create` link, it has at least an accompanying return link to the same data node, or it has a `call` link to a calculation node that takes the created data node as input. """ from aiida.backends.general.migrations.provenance_redesign import INVALID_LINK_SELECT_STATEMENTS from aiida.manage.database.integrity import write_database_integrity_violation for sql, warning_message in INVALID_LINK_SELECT_STATEMENTS: results = list(connection.execute(text(sql))) if results: headers = ['UUID source', 'UUID target', 'link type', 'link label'] write_database_integrity_violation(results, headers, warning_message)
def migrate_infer_calculation_entry_point(connection): """Set the process type for calculation nodes by inferring it from their type string.""" from aiida.manage.database.integrity import write_database_integrity_violation from aiida.manage.database.integrity.plugins import infer_calculation_entry_point from aiida.plugins.entry_point import ENTRY_POINT_STRING_SEPARATOR DbNode = table('db_dbnode', column('id', Integer), column('uuid', UUID), column('type', String), column('process_type', String)) query_set = connection.execute( select([DbNode.c.type ]).where(DbNode.c.type.like('calculation.%'))).fetchall() type_strings = set(entry[0] for entry in query_set) mapping_node_type_to_entry_point = infer_calculation_entry_point( type_strings=type_strings) fallback_cases = [] for type_string, entry_point_string in mapping_node_type_to_entry_point.items( ): # If the entry point string does not contain the entry point string separator, the mapping function was not able # to map the type string onto a known entry point string. As a fallback it uses the modified type string itself. # All affected entries should be logged to file that the user can consult. if ENTRY_POINT_STRING_SEPARATOR not in entry_point_string: query_set = connection.execute( select([DbNode.c.uuid ]).where(DbNode.c.type == op.inline_literal( type_string))).fetchall() uuids = [str(entry.uuid) for entry in query_set] for uuid in uuids: fallback_cases.append([uuid, type_string, entry_point_string]) connection.execute(DbNode.update().where( DbNode.c.type == op.inline_literal(type_string)).values( process_type=op.inline_literal(entry_point_string))) if fallback_cases: headers = ['UUID', 'type (old)', 'process_type (fallback)'] warning_message = 'found calculation nodes with a type string that could not be mapped onto a known entry point' action_message = 'inferred `process_type` for all calculation nodes, using fallback for unknown entry points' write_database_integrity_violation(fallback_cases, headers, warning_message, action_message)
def data_migration_legacy_process_attributes(data): """Apply migration 0040 - REV. 1.0.40 Data migration for some legacy process attributes. Attribute keys that are renamed: * `_sealed` -> `sealed` Attribute keys that are removed entirely: * `_finished` * `_failed` * `_aborted` * `_do_abort` Finally, after these first migrations, any remaining process nodes are screened for the existence of the `process_state` attribute. If they have it, it is checked whether the state is active or not, if not, the `sealed` attribute is created and set to `True`. :raises `~aiida.tools.importexport.common.exceptions.CorruptArchive`: if a Node, found to have attributes, cannot be found in the list of exported entities. :raises `~aiida.tools.importexport.common.exceptions.CorruptArchive`: if the 'sealed' attribute does not exist and the ProcessNode is in an active state, i.e. `process_state` is one of ('created', 'running', 'waiting'). A log-file, listing all illegal ProcessNodes, will be produced in the current directory. """ from aiida.tools.importexport.common.exceptions import CorruptArchive from aiida.manage.database.integrity import write_database_integrity_violation attrs_to_remove = [ '_sealed', '_finished', '_failed', '_aborted', '_do_abort' ] active_states = {'created', 'running', 'waiting'} illegal_cases = [] for node_pk, content in data['node_attributes'].items(): try: if data['export_data']['Node'][node_pk]['node_type'].startswith( 'process.'): # Check if the ProcessNode has a 'process_state' attribute, and if it's non-active. # Raise if the ProcessNode is in an active state, otherwise set `'sealed' = True` process_state = content.get('process_state', '') if process_state in active_states: # The ProcessNode is in an active state, and should therefore never have been allowed # to be exported. The Node will be added to a log that is saved in the working directory, # then a CorruptArchive will be raised, since the archive needs to be migrated manually. uuid_pk = data['export_data']['Node'][node_pk].get( 'uuid', node_pk) illegal_cases.append([uuid_pk, process_state]) continue # No reason to do more now # Either the ProcessNode is in a non-active state or its 'process_state' hasn't been set. # In both cases we claim the ProcessNode 'sealed' and make it importable. content['sealed'] = True # Remove attributes for attr in attrs_to_remove: content.pop(attr, None) except KeyError as exc: raise CorruptArchive( f'Your export archive is corrupt! Org. exception: {exc}') if illegal_cases: headers = ['UUID/PK', 'process_state'] warning_message = 'Found ProcessNodes with active process states ' \ 'that should never have been allowed to be exported.' write_database_integrity_violation(illegal_cases, headers, warning_message) raise CorruptArchive( 'Your export archive is corrupt! ' 'Please see the log-file in your current directory for more details.' )
def migration_provenance_redesign(data): # pylint: disable=too-many-locals,too-many-branches,too-many-statements """Apply migration: 0020 - REV. 1.0.20 Provenance redesign """ from aiida.manage.database.integrity.plugins import infer_calculation_entry_point from aiida.manage.database.integrity import write_database_integrity_violation from aiida.plugins.entry_point import ENTRY_POINT_STRING_SEPARATOR fallback_cases = [] calcjobs_to_migrate = {} for key, value in data['export_data'].get('Node', {}).items(): if value.get('type', '').startswith('calculation.job.'): calcjobs_to_migrate[key] = value if calcjobs_to_migrate: # step1: rename the type column of process nodes mapping_node_entry = infer_calculation_entry_point( type_strings=[e['type'] for e in calcjobs_to_migrate.values()]) for uuid, content in calcjobs_to_migrate.items(): type_string = content['type'] entry_point_string = mapping_node_entry[type_string] # If the entry point string does not contain the entry point string separator, # the mapping function was not able to map the type string onto a known entry point string. # As a fallback it uses the modified type string itself. # All affected entries should be logged to file that the user can consult. if ENTRY_POINT_STRING_SEPARATOR not in entry_point_string: fallback_cases.append([uuid, type_string, entry_point_string]) content['process_type'] = entry_point_string if fallback_cases: headers = ['UUID', 'type (old)', 'process_type (fallback)'] warning_message = 'found calculation nodes with a type string ' \ 'that could not be mapped onto a known entry point' action_message = 'inferred `process_type` for all calculation nodes, ' \ 'using fallback for unknown entry points' write_database_integrity_violation(fallback_cases, headers, warning_message, action_message) # step2: detect and delete unexpected links action_message = 'the link was deleted' headers = ['UUID source', 'UUID target', 'link type', 'link label'] def delete_wrong_links(node_uuids, link_type, headers, warning_message, action_message): """delete links that are matching link_type and are going from nodes listed in node_uuids""" violations = [] new_links_list = [] for link in data['links_uuid']: if link['input'] in node_uuids and link['type'] == link_type: violations.append([ link['input'], link['output'], link['type'], link['label'] ]) else: new_links_list.append(link) data['links_uuid'] = new_links_list if violations: write_database_integrity_violation(violations, headers, warning_message, action_message) # calculations with outgoing CALL links calculation_uuids = { value['uuid'] for value in data['export_data'].get('Node', {}).values() if (value.get('type', '').startswith('calculation.job.') or value.get('type', '').startswith('calculation.inline.')) } warning_message = 'detected calculation nodes with outgoing `call` links.' delete_wrong_links(calculation_uuids, 'calllink', headers, warning_message, action_message) # calculations with outgoing RETURN links warning_message = 'detected calculation nodes with outgoing `return` links.' delete_wrong_links(calculation_uuids, 'returnlink', headers, warning_message, action_message) # outgoing CREATE links from FunctionCalculation and WorkCalculation nodes warning_message = 'detected outgoing `create` links from FunctionCalculation and/or WorkCalculation nodes.' work_uuids = { value['uuid'] for value in data['export_data'].get('Node', {}).values() if (value.get('type', '').startswith('calculation.function') or value.get('type', '').startswith('calculation.work')) } delete_wrong_links(work_uuids, 'createlink', headers, warning_message, action_message) for node_id, node in data['export_data'].get('Node', {}).items(): # migrate very old `ProcessCalculation` to `WorkCalculation` if node.get('type', '') == 'calculation.process.ProcessCalculation.': node['type'] = 'calculation.work.WorkCalculation.' # WorkCalculations that have a `function_name` attribute are FunctionCalculations if node.get('type', '') == 'calculation.work.WorkCalculation.': if ('function_name' in data['node_attributes'][node_id] and data['node_attributes'][node_id]['function_name'] is not None): # for some reason for the workchains the 'function_name' attribute is present but has None value node[ 'type'] = 'node.process.workflow.workfunction.WorkFunctionNode.' else: node['type'] = 'node.process.workflow.workchain.WorkChainNode.' # update type for JobCalculation nodes if node.get('type', '').startswith('calculation.job.'): node['type'] = 'node.process.calculation.calcjob.CalcJobNode.' # update type for InlineCalculation nodes if node.get('type', '') == 'calculation.inline.InlineCalculation.': node[ 'type'] = 'node.process.calculation.calcfunction.CalcFunctionNode.' # update type for FunctionCalculation nodes if node.get('type', '') == 'calculation.function.FunctionCalculation.': node[ 'type'] = 'node.process.workflow.workfunction.WorkFunctionNode.' uuid_node_type_mapping = { node['uuid']: node['type'] for node in data['export_data'].get('Node', {}).values() if 'type' in node } for link in data['links_uuid']: inp_uuid = link['output'] # rename `createlink` to `create` if link['type'] == 'createlink': link['type'] = 'create' # rename `returnlink` to `return` elif link['type'] == 'returnlink': link['type'] = 'return' elif link['type'] == 'inputlink': # rename `inputlink` to `input_calc` if the target node is a calculation type node if uuid_node_type_mapping[inp_uuid].startswith( 'node.process.calculation'): link['type'] = 'input_calc' # rename `inputlink` to `input_work` if the target node is a workflow type node elif uuid_node_type_mapping[inp_uuid].startswith( 'node.process.workflow'): link['type'] = 'input_work' elif link['type'] == 'calllink': # rename `calllink` to `call_calc` if the target node is a calculation type node if uuid_node_type_mapping[inp_uuid].startswith( 'node.process.calculation'): link['type'] = 'call_calc' # rename `calllink` to `call_work` if the target node is a workflow type node elif uuid_node_type_mapping[inp_uuid].startswith( 'node.process.workflow'): link['type'] = 'call_work'