Exemplo n.º 1
0
    def get_potcar_groups(cls, filter_elements=None, filter_symbols=None):
        """
        List all names of groups of type PotcarFamily, possibly with some filters.

        :param filter_elements: list of strings.
               If present, returns only the groups that contains one POTCAR for
               every element present in the list. Default=None, meaning that
               all families are returned. A single element can be passed as a string.
        :param filter_symbols: list of strings with symbols to filter for.
        """
        group_query = QueryBuilder()
        group_query.append(Group,
                           with_node='potcar_data',
                           tag='potcar_data',
                           filters={'type_string': {
                               '==': cls.potcar_family_type_string
                           }},
                           project='*')

        groups = [group_list[0] for group_list in group_query.all()]

        if filter_elements:
            for element in filter_elements:
                idx_has_element = []
                for i, group in enumerate(groups):
                    group_filters = {'label': {'==': group.label}, 'type_string': {'==': cls.potcar_family_type_string}}
                    element_filters = {'attributes.element': {'==': element}}
                    elem_query = QueryBuilder()
                    elem_query.append(Group, tag='family', filters=group_filters)
                    elem_query.append(cls, tag='potcar', with_group='family', filters=element_filters)
                    if elem_query.count() > 0:
                        idx_has_element.append(i)
                groups = [groups[i] for i in range(len(groups)) if i in idx_has_element]

        if filter_symbols:
            for symbol in filter_symbols:
                idx_has_symbol = []
                for i, group in enumerate(groups):
                    group_filters = {'label': {'==': group.label}, 'type_string': {'==': cls.potcar_family_type_string}}
                    symbol_filters = {'attributes.symbol': {'==': symbol}}
                    symbol_query = QueryBuilder()
                    symbol_query.append(Group, tag='family', filters=group_filters)
                    symbol_query.append(cls, tag='potcar', with_group='family', filters=symbol_filters)
                    if symbol_query.count() > 0:
                        idx_has_symbol.append(i)
                groups = [groups[i] for i in range(len(groups)) if i in idx_has_symbol]

        return groups
Exemplo n.º 2
0
def link_triple_exists(source, target, link_type, link_label):
    """Return whether a link with the given type and label exists between the given source and target node.

    :param source: node from which the link is outgoing
    :param target: node to which the link is incoming
    :param link_type: the link type
    :param link_label: the link label
    :return: boolean, True if the link triple exists, False otherwise
    """
    from aiida.orm import Node, QueryBuilder

    # First check if the triple exist in the cache, in the case of an unstored target node
    if target._incoming_cache and LinkTriple(source, link_type, link_label) in target._incoming_cache:  # pylint: disable=protected-access
        return True

    # If either node is unstored (i.e. does not have a pk), the link cannot exist in the database, so no need to check
    if source.pk is None or target.pk is None:
        return False

    # Here we have two stored nodes, so we need to check if the same link already exists in the database.
    # Finding just a single match is sufficient so we can use the `limit` clause for efficiency
    builder = QueryBuilder()
    builder.append(Node, filters={'id': source.id}, project=['id'])
    builder.append(Node, filters={'id': target.id}, edge_filters={'type': link_type.value, 'label': link_label})
    builder.limit(1)

    return builder.count() != 0
def get_data_aiida(cif_uuid, plot_info):
    """Query the AiiDA database"""
    from figure.aiida import load_profile
    from aiida.orm import QueryBuilder, Dict, CifData

    load_profile()

    qb = QueryBuilder()
    qb.append(CifData,
              filters={'uuid': {
                  '==': cif_uuid
              }},
              tag='cifs',
              project='*')
    qb.append(
        Dict,
        descendant_of='cifs',
        project='*',
    )

    nresults = qb.count()
    if nresults == 0:
        plot_info.text = 'No matching COF found.'
        return None
    return qb.one()
Exemplo n.º 4
0
def reuse_kpoints_grid(grid, lowest_pk=False):
    """
    Retrieve previously stored kpoints mesh data node.
    If there is no such ``KpointsData``, a new node will be created.
    Will return the one with highest pk
    :param grid: Grid to be retrieved
    :param bool lowest_pk: If set to True will return the node with lowest pk

    :returns: A KpointsData node representing the grid requested
    """
    from aiida.orm import QueryBuilder
    from aiida.orm import KpointsData
    qbd = QueryBuilder()
    qbd.append(KpointsData,
               tag="kpoints",
               filters={
                   "attributes.mesh.0": grid[0],
                   "attributes.mesh.1": grid[1],
                   "attributes.mesh.2": grid[2]
               })
    if lowest_pk:
        order = "asc"
    else:
        order = "desc"
    qbd.order_by({"kpoints": [{"id": {"order": order}}]})
    if qbd.count() >= 1:

        return qbd.first()[0]
    kpoints = KpointsData()
    kpoints.set_kpoints_mesh(grid)
    return kpoints
Exemplo n.º 5
0
def merge_comment(incoming_comment, comment_mode):
    """ Merge comment according comment_mode
    :return: New UUID if new Comment should be created, else None.
    """

    # Get incoming Comment's UUID, 'mtime', and 'comment'
    incoming_uuid = str(incoming_comment['uuid'])
    incoming_mtime = incoming_comment['mtime']
    incoming_content = incoming_comment['content']

    # Compare modification time 'mtime'
    if comment_mode == 'newest':
        # Get existing Comment's 'mtime' and 'content'
        builder = QueryBuilder().append(Comment,
                                        filters={'uuid': incoming_uuid},
                                        project=['mtime', 'content'])
        if builder.count() != 1:
            raise exceptions.ImportValidationError(
                'Multiple Comments with the same UUID: {}'.format(
                    incoming_uuid))
        builder = builder.all()

        existing_mtime = builder[0][0]
        existing_content = builder[0][1]

        # Existing Comment is "newer" than imported Comment: KEEP existing
        if existing_mtime > incoming_mtime:
            return None

        # Existing Comment is "older" than imported Comment: OVERWRITE existing
        if existing_mtime < incoming_mtime:
            cmt = Comment.objects.get(uuid=incoming_uuid)
            cmt.set_content(incoming_content)
            cmt.set_mtime(incoming_mtime)
            return None

        # Existing Comment has the same modification time as the imported Comment
        # Check content. If the same, ignore Comment. If different, add as new Comment.
        if existing_mtime == incoming_mtime:
            if existing_content == incoming_content:
                # Ignore
                return None

            # ELSE: Add it as a new comment
            return get_new_uuid()

    # Overwrite existing Comment
    elif comment_mode == 'overwrite':
        cmt = Comment.objects.get(uuid=incoming_uuid)
        cmt.set_content(incoming_content)
        cmt.set_mtime(incoming_mtime)
        return None

    # Invalid comment_mode
    else:
        raise exceptions.ImportValidationError(
            'Unknown comment_mode value: {}. Should be '
            "either 'newest' or 'overwrite'".format(comment_mode))
Exemplo n.º 6
0
def listfamilies(element, with_description):
    """List available OtfgData families"""
    from aiida.orm import QueryBuilder, Node
    from aiida_castep.data.otfg import OTFGGroup

    qbd = QueryBuilder()
    qbd.append(Node, tag="otfgdata")
    if element:
        qbd.add_filter("otfgdata", {
            "attributes.element": {
                "or": [{
                    'in': element
                }, {
                    '==': "LIBRARY"
                }]
            }
        })
    qbd.append(OTFGGroup,
               tag='group',
               with_node='otfgdata',
               project=['label', 'description'])
    qbd.distinct()
    if qbd.count() > 0:
        for res in qbd.dict():
            group_label = res.get("group").get("label")
            group_desc = res.get("group").get("description")
            # Count the number of pseudos in this group
            qbd = QueryBuilder()
            qbd.append(OTFGGroup,
                       tag='thisgroup',
                       filters={"label": {
                           'like': group_label
                       }})
            qbd.append(Node, project=["id"], with_group='thisgroup')

            if with_description:
                description_string = ": {}".format(group_desc)
            else:
                description_string = ""

            click.echo("* {} [{} pseudos]{}".format(group_label, qbd.count(),
                                                    description_string))

    else:
        click.echo("No valid pseudopotential family found.")
Exemplo n.º 7
0
 def _get_unique_node_property(self, project: str) -> Union[Node, Any]:
     query = QueryBuilder(limit=1)
     query.append(self.AIIDA_ENTITY,
                  filters={"id": self._pk},
                  project=project)
     if query.count() != 1:
         raise AiidaEntityNotFound(
             f"Could not find {self.AIIDA_ENTITY} with PK {self._pk}.")
     return query.first()[0]
Exemplo n.º 8
0
def get_data_aiida(projections, sliders_dict, quantities, plot_info):
    """Query the AiiDA database"""
    from figure.aiida import load_profile
    from aiida.orm import QueryBuilder, Dict

    load_profile()

    filters = {}

    def add_range_filter(bounds, label):
        # a bit of cheating until this is resolved
        # https://github.com/aiidateam/aiida_core/issues/1389
        #filters['attributes.'+label] = {'>=':bounds[0]}
        filters['attributes.' + label] = {
            'and': [{
                '>=': bounds[0]
            }, {
                '<': bounds[1]
            }]
        }

    for k, v in sliders_dict.items():
        # Note: filtering is costly, avoid if possible
        if not v.value == quantities[k]['range']:
            add_range_filter(v.value, k)

    qb = QueryBuilder()
    qb.append(
        Dict,
        filters=filters,
        project=['attributes.' + p
                 for p in projections] + ['uuid', 'extras.cif_uuid'],
    )

    nresults = qb.count()
    if nresults == 0:
        plot_info.text = 'No matching COFs found.'
        return data_empty

    plot_info.text = '{} COFs found. Plotting...'.format(nresults)

    # x,y position
    x, y, clrs, uuids, names, cif_uuids = list(zip(*qb.all()))
    plot_info.text = '{} COFs queried'.format(nresults)
    x = list(map(float, x))
    y = list(map(float, y))
    cif_uuids = list(map(str, cif_uuids))
    uuids = list(map(str, uuids))

    if projections[2] == 'bond_type':
        #clrs = map(lambda clr: bondtypes.index(clr), clrs)
        clrs = list(map(str, clrs))
    else:
        clrs = list(map(float, clrs))

    return dict(x=x, y=y, uuid=cif_uuids, color=clrs, name=names)
Exemplo n.º 9
0
    def is_loop_finished(self):
        qb = QueryBuilder()
        qb.append(Group, filters={'label': {'==': self.uuid}})
        if qb.count() == 1:
            self.report("Iteration loop is manually terminated at step %d."
                        % self.ctx.iteration)
            return False

        self.ctx.iteration += 1
        return self.ctx.iteration <= self.inputs.max_iteration.value
Exemplo n.º 10
0
def database_summary(verbose):
    """Summarise the entities in the database."""
    from aiida.orm import QueryBuilder, Node, Group, Computer, Comment, Log, User
    data = {}

    # User
    query_user = QueryBuilder().append(User, project=['email'])
    data['Users'] = {'count': query_user.count()}
    if verbose:
        data['Users']['emails'] = query_user.distinct().all(flat=True)

    # Computer
    query_comp = QueryBuilder().append(Computer, project=['name'])
    data['Computers'] = {'count': query_comp.count()}
    if verbose:
        data['Computers']['names'] = query_comp.distinct().all(flat=True)

    # Node
    count = QueryBuilder().append(Node).count()
    data['Nodes'] = {'count': count}
    if verbose:
        node_types = QueryBuilder().append(Node, project=['node_type']).distinct().all(flat=True)
        data['Nodes']['node_types'] = node_types
        process_types = QueryBuilder().append(Node, project=['process_type']).distinct().all(flat=True)
        data['Nodes']['process_types'] = [p for p in process_types if p]

    # Group
    query_group = QueryBuilder().append(Group, project=['type_string'])
    data['Groups'] = {'count': query_group.count()}
    if verbose:
        data['Groups']['type_strings'] = query_group.distinct().all(flat=True)

    # Comment
    count = QueryBuilder().append(Comment).count()
    data['Comments'] = {'count': count}

    # Log
    count = QueryBuilder().append(Log).count()
    data['Logs'] = {'count': count}

    echo.echo_dictionary(data, sort_keys=False, fmt='yaml')
Exemplo n.º 11
0
def import_data_sqla(in_path,
                     group=None,
                     ignore_unknown_nodes=False,
                     extras_mode_existing='kcl',
                     extras_mode_new='import',
                     comment_mode='newest',
                     silent=False,
                     **kwargs):
    """Import exported AiiDA archive to the AiiDA database and repository.

    Specific for the SQLAlchemy backend.
    If ``in_path`` is a folder, calls extract_tree; otherwise, tries to detect the compression format
    (zip, tar.gz, tar.bz2, ...) and calls the correct function.

    :param in_path: the path to a file or folder that can be imported in AiiDA.
    :type in_path: str

    :param group: Group wherein all imported Nodes will be placed.
    :type group: :py:class:`~aiida.orm.groups.Group`

    :param extras_mode_existing: 3 letter code that will identify what to do with the extras import.
        The first letter acts on extras that are present in the original node and not present in the imported node.
        Can be either:
        'k' (keep it) or
        'n' (do not keep it).
        The second letter acts on the imported extras that are not present in the original node.
        Can be either:
        'c' (create it) or
        'n' (do not create it).
        The third letter defines what to do in case of a name collision.
        Can be either:
        'l' (leave the old value),
        'u' (update with a new value),
        'd' (delete the extra), or
        'a' (ask what to do if the content is different).
    :type extras_mode_existing: str

    :param extras_mode_new: 'import' to import extras of new nodes or 'none' to ignore them.
    :type extras_mode_new: str

    :param comment_mode: Comment import modes (when same UUIDs are found).
        Can be either:
        'newest' (will keep the Comment with the most recent modification time (mtime)) or
        'overwrite' (will overwrite existing Comments with the ones from the import file).
    :type comment_mode: str

    :param silent: suppress progress bar and summary.
    :type silent: bool

    :return: New and existing Nodes and Links.
    :rtype: dict

    :raises `~aiida.tools.importexport.common.exceptions.ImportValidationError`: if parameters or the contents of
        `metadata.json` or `data.json` can not be validated.
    :raises `~aiida.tools.importexport.common.exceptions.CorruptArchive`: if the provided archive at ``in_path`` is
        corrupted.
    :raises `~aiida.tools.importexport.common.exceptions.IncompatibleArchiveVersionError`: if the provided archive's
        export version is not equal to the export version of AiiDA at the moment of import.
    :raises `~aiida.tools.importexport.common.exceptions.ArchiveImportError`: if there are any internal errors when
        importing.
    :raises `~aiida.tools.importexport.common.exceptions.ImportUniquenessError`: if a new unique entity can not be
        created.
    """
    from aiida.backends.sqlalchemy.models.node import DbNode, DbLink
    from aiida.backends.sqlalchemy.utils import flag_modified

    # This is the export version expected by this function
    expected_export_version = StrictVersion(EXPORT_VERSION)

    # The returned dictionary with new and existing nodes and links
    ret_dict = {}

    # Initial check(s)
    if group:
        if not isinstance(group, Group):
            raise exceptions.ImportValidationError(
                'group must be a Group entity')
        elif not group.is_stored:
            group.store()

    if silent:
        logging.disable(level=logging.CRITICAL)

    ################
    # EXTRACT DATA #
    ################
    # The sandbox has to remain open until the end
    with SandboxFolder() as folder:
        if os.path.isdir(in_path):
            extract_tree(in_path, folder)
        else:
            if tarfile.is_tarfile(in_path):
                extract_tar(in_path,
                            folder,
                            silent=silent,
                            nodes_export_subfolder=NODES_EXPORT_SUBFOLDER,
                            **kwargs)
            elif zipfile.is_zipfile(in_path):
                extract_zip(in_path,
                            folder,
                            silent=silent,
                            nodes_export_subfolder=NODES_EXPORT_SUBFOLDER,
                            **kwargs)
            else:
                raise exceptions.ImportValidationError(
                    'Unable to detect the input file format, it is neither a '
                    'tar file, nor a (possibly compressed) zip file.')

        if not folder.get_content_list():
            raise exceptions.CorruptArchive(
                'The provided file/folder ({}) is empty'.format(in_path))
        try:
            IMPORT_LOGGER.debug('CACHING metadata.json')
            with open(folder.get_abs_path('metadata.json'),
                      encoding='utf8') as fhandle:
                metadata = json.load(fhandle)

            IMPORT_LOGGER.debug('CACHING data.json')
            with open(folder.get_abs_path('data.json'),
                      encoding='utf8') as fhandle:
                data = json.load(fhandle)
        except IOError as error:
            raise exceptions.CorruptArchive(
                'Unable to find the file {} in the import file or folder'.
                format(error.filename))

        ######################
        # PRELIMINARY CHECKS #
        ######################
        export_version = StrictVersion(str(metadata['export_version']))
        if export_version != expected_export_version:
            msg = 'Export file version is {}, can import only version {}'\
                    .format(metadata['export_version'], expected_export_version)
            if export_version < expected_export_version:
                msg += "\nUse 'verdi export migrate' to update this export file."
            else:
                msg += '\nUpdate your AiiDA version in order to import this file.'

            raise exceptions.IncompatibleArchiveVersionError(msg)

        start_summary(in_path, comment_mode, extras_mode_new,
                      extras_mode_existing)

        ###################################################################
        #           CREATE UUID REVERSE TABLES AND CHECK IF               #
        #              I HAVE ALL NODES FOR THE LINKS                     #
        ###################################################################
        IMPORT_LOGGER.debug(
            'CHECKING IF NODES FROM LINKS ARE IN DB OR ARCHIVE...')

        linked_nodes = set(
            chain.from_iterable(
                (l['input'], l['output']) for l in data['links_uuid']))
        group_nodes = set(chain.from_iterable(data['groups_uuid'].values()))

        # Check that UUIDs are valid
        linked_nodes = set(x for x in linked_nodes if validate_uuid(x))
        group_nodes = set(x for x in group_nodes if validate_uuid(x))

        import_nodes_uuid = set()
        for value in data['export_data'].get(NODE_ENTITY_NAME, {}).values():
            import_nodes_uuid.add(value['uuid'])

        unknown_nodes = linked_nodes.union(group_nodes) - import_nodes_uuid

        if unknown_nodes and not ignore_unknown_nodes:
            raise exceptions.DanglingLinkError(
                'The import file refers to {} nodes with unknown UUID, therefore it cannot be imported. Either first '
                'import the unknown nodes, or export also the parents when exporting. The unknown UUIDs are:\n'
                ''.format(len(unknown_nodes)) +
                '\n'.join('* {}'.format(uuid) for uuid in unknown_nodes))

        ###################################
        # DOUBLE-CHECK MODEL DEPENDENCIES #
        ###################################
        # The entity import order. It is defined by the database model relationships.
        entity_order = [
            USER_ENTITY_NAME, COMPUTER_ENTITY_NAME, NODE_ENTITY_NAME,
            GROUP_ENTITY_NAME, LOG_ENTITY_NAME, COMMENT_ENTITY_NAME
        ]

        #  I make a new list that contains the entity names:
        # eg: ['User', 'Computer', 'Node', 'Group']
        for import_field_name in metadata['all_fields_info']:
            if import_field_name not in entity_order:
                raise exceptions.ImportValidationError(
                    "You are trying to import an unknown model '{}'!".format(
                        import_field_name))

        for idx, entity_name in enumerate(entity_order):
            dependencies = []
            # for every field, I checked the dependencies given as value for key requires
            for field in metadata['all_fields_info'][entity_name].values():
                try:
                    dependencies.append(field['requires'])
                except KeyError:
                    # (No ForeignKey)
                    pass
            for dependency in dependencies:
                if dependency not in entity_order[:idx]:
                    raise exceptions.ArchiveImportError(
                        'Entity {} requires {} but would be loaded first; stopping...'
                        .format(entity_name, dependency))

        ###################################################
        # CREATE IMPORT DATA DIRECT UNIQUE_FIELD MAPPINGS #
        ###################################################
        # This is nested dictionary of entity_name:{id:uuid}
        # to map one id (the pk) to a different one.
        # One of the things to remove for v0.4
        # {
        # 'Node': {2362: '82a897b5-fb3a-47d7-8b22-c5fe1b4f2c14',
        #           2363: 'ef04aa5d-99e7-4bfd-95ef-fe412a6a3524', 2364: '1dc59576-af21-4d71-81c2-bac1fc82a84a'},
        # 'User': {1: 'aiida@localhost'}
        # }
        IMPORT_LOGGER.debug('CREATING PK-2-UUID/EMAIL MAPPING...')
        import_unique_ids_mappings = {}
        # Export data since v0.3 contains the keys entity_name
        for entity_name, import_data in data['export_data'].items():
            # Again I need the entity_name since that's what's being stored since 0.3
            if entity_name in metadata['unique_identifiers']:
                # I have to reconvert the pk to integer
                import_unique_ids_mappings[entity_name] = {
                    int(k): v[metadata['unique_identifiers'][entity_name]]
                    for k, v in import_data.items()
                }
        ###############
        # IMPORT DATA #
        ###############
        # DO ALL WITH A TRANSACTION
        import aiida.backends.sqlalchemy

        session = aiida.backends.sqlalchemy.get_scoped_session()

        try:
            foreign_ids_reverse_mappings = {}
            new_entries = {}
            existing_entries = {}

            IMPORT_LOGGER.debug('GENERATING LIST OF DATA...')

            # Instantiate progress bar
            progress_bar = get_progress_bar(total=1,
                                            leave=False,
                                            disable=silent)
            pbar_base_str = 'Generating list of data - '

            # Get total entities from data.json
            # To be used with progress bar
            number_of_entities = 0

            # I first generate the list of data
            for entity_name in entity_order:
                entity = entity_names_to_entities[entity_name]
                # I get the unique identifier, since v0.3 stored under entity_name
                unique_identifier = metadata['unique_identifiers'].get(
                    entity_name, None)

                # so, new_entries. Also, since v0.3 it makes more sense to use the entity_name
                new_entries[entity_name] = {}
                existing_entries[entity_name] = {}
                foreign_ids_reverse_mappings[entity_name] = {}

                # Not necessarily all models are exported
                if entity_name in data['export_data']:

                    IMPORT_LOGGER.debug('  %s...', entity_name)

                    progress_bar.set_description_str(pbar_base_str +
                                                     entity_name,
                                                     refresh=False)
                    number_of_entities += len(data['export_data'][entity_name])

                    if unique_identifier is not None:
                        import_unique_ids = set(
                            v[unique_identifier]
                            for v in data['export_data'][entity_name].values())

                        relevant_db_entries = {}
                        if import_unique_ids:
                            builder = QueryBuilder()
                            builder.append(entity,
                                           filters={
                                               unique_identifier: {
                                                   'in': import_unique_ids
                                               }
                                           },
                                           project='*')

                            if builder.count():
                                progress_bar = get_progress_bar(
                                    total=builder.count(), disable=silent)
                                for object_ in builder.iterall():
                                    progress_bar.update()

                                    relevant_db_entries.update({
                                        getattr(object_[0], unique_identifier):
                                        object_[0]
                                    })

                            foreign_ids_reverse_mappings[entity_name] = {
                                k: v.pk
                                for k, v in relevant_db_entries.items()
                            }

                        IMPORT_LOGGER.debug('    GOING THROUGH ARCHIVE...')

                        imported_comp_names = set()
                        for key, value in data['export_data'][
                                entity_name].items():
                            if entity_name == GROUP_ENTITY_NAME:
                                # Check if there is already a group with the same name,
                                # and if so, recreate the name
                                orig_label = value['label']
                                dupl_counter = 0
                                while QueryBuilder().append(
                                        entity,
                                        filters={
                                            'label': {
                                                '==': value['label']
                                            }
                                        }).count():
                                    # Rename the new group
                                    value[
                                        'label'] = orig_label + DUPL_SUFFIX.format(
                                            dupl_counter)
                                    dupl_counter += 1
                                    if dupl_counter == 100:
                                        raise exceptions.ImportUniquenessError(
                                            'A group of that label ( {} ) already exists and I could not create a new '
                                            'one'.format(orig_label))

                            elif entity_name == COMPUTER_ENTITY_NAME:
                                # The following is done for compatibility
                                # reasons in case the export file was generated
                                # with the Django export method. In Django the
                                # metadata and the transport parameters are
                                # stored as (unicode) strings of the serialized
                                # JSON objects and not as simple serialized
                                # JSON objects.
                                if isinstance(value['metadata'], (str, bytes)):
                                    value['metadata'] = json.loads(
                                        value['metadata'])

                                # Check if there is already a computer with the
                                # same name in the database
                                builder = QueryBuilder()
                                builder.append(
                                    entity,
                                    filters={'name': {
                                        '==': value['name']
                                    }},
                                    project=['*'],
                                    tag='res')
                                dupl = builder.count(
                                ) or value['name'] in imported_comp_names
                                dupl_counter = 0
                                orig_name = value['name']
                                while dupl:
                                    # Rename the new computer
                                    value[
                                        'name'] = orig_name + DUPL_SUFFIX.format(
                                            dupl_counter)
                                    builder = QueryBuilder()
                                    builder.append(entity,
                                                   filters={
                                                       'name': {
                                                           '==': value['name']
                                                       }
                                                   },
                                                   project=['*'],
                                                   tag='res')
                                    dupl = builder.count(
                                    ) or value['name'] in imported_comp_names
                                    dupl_counter += 1
                                    if dupl_counter == 100:
                                        raise exceptions.ImportUniquenessError(
                                            'A computer of that name ( {} ) already exists and I could not create a '
                                            'new one'.format(orig_name))

                                imported_comp_names.add(value['name'])

                            if value[unique_identifier] in relevant_db_entries:
                                # Already in DB
                                # again, switched to entity_name in v0.3
                                existing_entries[entity_name][key] = value
                            else:
                                # To be added
                                new_entries[entity_name][key] = value
                    else:
                        new_entries[entity_name] = data['export_data'][
                            entity_name]

            # Progress bar - reset for import
            progress_bar = get_progress_bar(total=number_of_entities,
                                            disable=silent)
            reset_progress_bar = {}

            # I import data from the given model
            for entity_name in entity_order:
                entity = entity_names_to_entities[entity_name]
                fields_info = metadata['all_fields_info'].get(entity_name, {})
                unique_identifier = metadata['unique_identifiers'].get(
                    entity_name, '')

                # Progress bar initialization - Model
                if reset_progress_bar:
                    progress_bar = get_progress_bar(
                        total=reset_progress_bar['total'], disable=silent)
                    progress_bar.n = reset_progress_bar['n']
                    reset_progress_bar = {}
                pbar_base_str = '{}s - '.format(entity_name)
                progress_bar.set_description_str(pbar_base_str +
                                                 'Initializing',
                                                 refresh=True)

                # EXISTING ENTRIES
                if existing_entries[entity_name]:
                    # Progress bar update - Model
                    progress_bar.set_description_str(
                        pbar_base_str + '{} existing entries'.format(
                            len(existing_entries[entity_name])),
                        refresh=True)

                for import_entry_pk, entry_data in existing_entries[
                        entity_name].items():
                    unique_id = entry_data[unique_identifier]
                    existing_entry_pk = foreign_ids_reverse_mappings[
                        entity_name][unique_id]
                    import_data = dict(
                        deserialize_field(k,
                                          v,
                                          fields_info=fields_info,
                                          import_unique_ids_mappings=
                                          import_unique_ids_mappings,
                                          foreign_ids_reverse_mappings=
                                          foreign_ids_reverse_mappings)
                        for k, v in entry_data.items())
                    # TODO COMPARE, AND COMPARE ATTRIBUTES

                    if entity_name == COMMENT_ENTITY_NAME:
                        new_entry_uuid = merge_comment(import_data,
                                                       comment_mode)
                        if new_entry_uuid is not None:
                            entry_data[unique_identifier] = new_entry_uuid
                            new_entries[entity_name][
                                import_entry_pk] = entry_data

                    if entity_name not in ret_dict:
                        ret_dict[entity_name] = {'new': [], 'existing': []}
                    ret_dict[entity_name]['existing'].append(
                        (import_entry_pk, existing_entry_pk))
                    IMPORT_LOGGER.debug('Existing %s: %s (%s->%s)',
                                        entity_name, unique_id,
                                        import_entry_pk, existing_entry_pk)

                # Store all objects for this model in a list, and store them
                # all in once at the end.
                objects_to_create = list()
                # In the following list we add the objects to be updated
                objects_to_update = list()
                # This is needed later to associate the import entry with the new pk
                import_new_entry_pks = dict()

                # NEW ENTRIES
                if new_entries[entity_name]:
                    # Progress bar update - Model
                    progress_bar.set_description_str(
                        pbar_base_str +
                        '{} new entries'.format(len(new_entries[entity_name])),
                        refresh=True)

                for import_entry_pk, entry_data in new_entries[
                        entity_name].items():
                    unique_id = entry_data[unique_identifier]
                    import_data = dict(
                        deserialize_field(k,
                                          v,
                                          fields_info=fields_info,
                                          import_unique_ids_mappings=
                                          import_unique_ids_mappings,
                                          foreign_ids_reverse_mappings=
                                          foreign_ids_reverse_mappings)
                        for k, v in entry_data.items())

                    # We convert the Django fields to SQLA. Note that some of
                    # the Django fields were converted to SQLA compatible
                    # fields by the deserialize_field method. This was done
                    # for optimization reasons in Django but makes them
                    # compatible with the SQLA schema and they don't need any
                    # further conversion.
                    if entity_name in file_fields_to_model_fields:
                        for file_fkey in file_fields_to_model_fields[
                                entity_name]:

                            # This is an exception because the DbLog model defines the `_metadata` column instead of the
                            # `metadata` column used in the Django model. This is because the SqlAlchemy model base
                            # class already has a metadata attribute that cannot be overridden. For consistency, the
                            # `DbLog` class however expects the `metadata` keyword in its constructor, so we should
                            # ignore the mapping here
                            if entity_name == LOG_ENTITY_NAME and file_fkey == 'metadata':
                                continue

                            model_fkey = file_fields_to_model_fields[
                                entity_name][file_fkey]
                            if model_fkey in import_data:
                                continue
                            import_data[model_fkey] = import_data[file_fkey]
                            import_data.pop(file_fkey, None)

                    db_entity = get_object_from_string(
                        entity_names_to_sqla_schema[entity_name])

                    objects_to_create.append(db_entity(**import_data))
                    import_new_entry_pks[unique_id] = import_entry_pk

                if entity_name == NODE_ENTITY_NAME:
                    IMPORT_LOGGER.debug(
                        'STORING NEW NODE REPOSITORY FILES & ATTRIBUTES...')

                    # NEW NODES
                    for object_ in objects_to_create:
                        import_entry_uuid = object_.uuid
                        import_entry_pk = import_new_entry_pks[
                            import_entry_uuid]

                        # Progress bar initialization - Node
                        progress_bar.update()
                        pbar_node_base_str = pbar_base_str + 'UUID={} - '.format(
                            import_entry_uuid.split('-')[0])

                        # Before storing entries in the DB, I store the files (if these are nodes).
                        # Note: only for new entries!
                        subfolder = folder.get_subfolder(
                            os.path.join(NODES_EXPORT_SUBFOLDER,
                                         export_shard_uuid(import_entry_uuid)))
                        if not subfolder.exists():
                            raise exceptions.CorruptArchive(
                                'Unable to find the repository folder for Node with UUID={} in the exported '
                                'file'.format(import_entry_uuid))
                        destdir = RepositoryFolder(
                            section=Repository._section_name,
                            uuid=import_entry_uuid)
                        # Replace the folder, possibly destroying existing previous folders, and move the files
                        # (faster if we are on the same filesystem, and in any case the source is a SandboxFolder)
                        progress_bar.set_description_str(pbar_node_base_str +
                                                         'Repository',
                                                         refresh=True)
                        destdir.replace_with_folder(subfolder.abspath,
                                                    move=True,
                                                    overwrite=True)

                        # For Nodes, we also have to store Attributes!
                        IMPORT_LOGGER.debug('STORING NEW NODE ATTRIBUTES...')
                        progress_bar.set_description_str(pbar_node_base_str +
                                                         'Attributes',
                                                         refresh=True)

                        # Get attributes from import file
                        try:
                            object_.attributes = data['node_attributes'][str(
                                import_entry_pk)]
                        except KeyError:
                            raise exceptions.CorruptArchive(
                                'Unable to find attribute info for Node with UUID={}'
                                .format(import_entry_uuid))

                        # For DbNodes, we also have to store extras
                        if extras_mode_new == 'import':
                            IMPORT_LOGGER.debug('STORING NEW NODE EXTRAS...')
                            progress_bar.set_description_str(
                                pbar_node_base_str + 'Extras', refresh=True)

                            # Get extras from import file
                            try:
                                extras = data['node_extras'][str(
                                    import_entry_pk)]
                            except KeyError:
                                raise exceptions.CorruptArchive(
                                    'Unable to find extra info for Node with UUID={}'
                                    .format(import_entry_uuid))
                            # TODO: remove when aiida extras will be moved somewhere else
                            # from here
                            extras = {
                                key: value
                                for key, value in extras.items()
                                if not key.startswith('_aiida_')
                            }
                            if object_.node_type.endswith('code.Code.'):
                                extras = {
                                    key: value
                                    for key, value in extras.items()
                                    if not key == 'hidden'
                                }
                            # till here
                            object_.extras = extras
                        elif extras_mode_new == 'none':
                            IMPORT_LOGGER.debug('SKIPPING NEW NODE EXTRAS...')
                        else:
                            raise exceptions.ImportValidationError(
                                "Unknown extras_mode_new value: {}, should be either 'import' or 'none'"
                                ''.format(extras_mode_new))

                    # EXISTING NODES (Extras)
                    IMPORT_LOGGER.debug('UPDATING EXISTING NODE EXTRAS...')

                    import_existing_entry_pks = {
                        entry_data[unique_identifier]: import_entry_pk
                        for import_entry_pk, entry_data in
                        existing_entries[entity_name].items()
                    }
                    for node in session.query(DbNode).filter(
                            DbNode.uuid.in_(import_existing_entry_pks)).all():
                        import_entry_uuid = str(node.uuid)
                        import_entry_pk = import_existing_entry_pks[
                            import_entry_uuid]

                        # Progress bar initialization - Node
                        pbar_node_base_str = pbar_base_str + 'UUID={} - '.format(
                            import_entry_uuid.split('-')[0])
                        progress_bar.set_description_str(pbar_node_base_str +
                                                         'Extras',
                                                         refresh=False)
                        progress_bar.update()

                        # Get extras from import file
                        try:
                            extras = data['node_extras'][str(import_entry_pk)]
                        except KeyError:
                            raise exceptions.CorruptArchive(
                                'Unable to find extra info for Node with UUID={}'
                                .format(import_entry_uuid))

                        old_extras = node.extras.copy()
                        # TODO: remove when aiida extras will be moved somewhere else
                        # from here
                        extras = {
                            key: value
                            for key, value in extras.items()
                            if not key.startswith('_aiida_')
                        }
                        if node.node_type.endswith('code.Code.'):
                            extras = {
                                key: value
                                for key, value in extras.items()
                                if not key == 'hidden'
                            }
                        # till here
                        new_extras = merge_extras(node.extras, extras,
                                                  extras_mode_existing)
                        if new_extras != old_extras:
                            node.extras = new_extras
                            flag_modified(node, 'extras')
                            objects_to_update.append(node)

                else:
                    # Update progress bar with new non-Node entries
                    progress_bar.update(n=len(existing_entries[entity_name]) +
                                        len(new_entries[entity_name]))

                progress_bar.set_description_str(pbar_base_str + 'Storing',
                                                 refresh=True)

                # Store them all in once; However, the PK are not set in this way...
                if objects_to_create:
                    session.add_all(objects_to_create)
                if objects_to_update:
                    session.add_all(objects_to_update)

                session.flush()

                just_saved = {}
                if import_new_entry_pks.keys():
                    reset_progress_bar = {
                        'total': progress_bar.total,
                        'n': progress_bar.n
                    }
                    progress_bar = get_progress_bar(
                        total=len(import_new_entry_pks), disable=silent)

                    builder = QueryBuilder()
                    builder.append(entity,
                                   filters={
                                       unique_identifier: {
                                           'in':
                                           list(import_new_entry_pks.keys())
                                       }
                                   },
                                   project=[unique_identifier, 'id'])

                    for entry in builder.iterall():
                        progress_bar.update()

                        just_saved.update({entry[0]: entry[1]})

                progress_bar.set_description_str(pbar_base_str + 'Done!',
                                                 refresh=True)

                # Now I have the PKs, print the info
                # Moreover, add newly created Nodes to foreign_ids_reverse_mappings
                for unique_id, new_pk in just_saved.items():
                    from uuid import UUID
                    if isinstance(unique_id, UUID):
                        unique_id = str(unique_id)
                    import_entry_pk = import_new_entry_pks[unique_id]
                    foreign_ids_reverse_mappings[entity_name][
                        unique_id] = new_pk
                    if entity_name not in ret_dict:
                        ret_dict[entity_name] = {'new': [], 'existing': []}
                    ret_dict[entity_name]['new'].append(
                        (import_entry_pk, new_pk))

                    IMPORT_LOGGER.debug('N %s: %s (%s->%s)', entity_name,
                                        unique_id, import_entry_pk, new_pk)

            IMPORT_LOGGER.debug('STORING NODE LINKS...')

            import_links = data['links_uuid']

            if import_links:
                progress_bar = get_progress_bar(total=len(import_links),
                                                disable=silent)
                pbar_base_str = 'Links - '

            for link in import_links:
                # Check for dangling Links within the, supposed, self-consistent archive
                progress_bar.set_description_str(
                    pbar_base_str + 'label={}'.format(link['label']),
                    refresh=False)
                progress_bar.update()

                try:
                    in_id = foreign_ids_reverse_mappings[NODE_ENTITY_NAME][
                        link['input']]
                    out_id = foreign_ids_reverse_mappings[NODE_ENTITY_NAME][
                        link['output']]
                except KeyError:
                    if ignore_unknown_nodes:
                        continue
                    raise exceptions.ImportValidationError(
                        'Trying to create a link with one or both unknown nodes, stopping (in_uuid={}, out_uuid={}, '
                        'label={}, type={})'.format(link['input'],
                                                    link['output'],
                                                    link['label'],
                                                    link['type']))

                # Since backend specific Links (DbLink) are not validated upon creation, we will now validate them.
                source = QueryBuilder().append(Node,
                                               filters={
                                                   'id': in_id
                                               },
                                               project='*').first()[0]
                target = QueryBuilder().append(Node,
                                               filters={
                                                   'id': out_id
                                               },
                                               project='*').first()[0]
                link_type = LinkType(link['type'])

                # Check for existence of a triple link, i.e. unique triple.
                # If it exists, then the link already exists, continue to next link, otherwise, validate link.
                if link_triple_exists(source, target, link_type,
                                      link['label']):
                    continue

                try:
                    validate_link(source, target, link_type, link['label'])
                except ValueError as why:
                    raise exceptions.ImportValidationError(
                        'Error occurred during Link validation: {}'.format(
                            why))

                # New link
                session.add(
                    DbLink(input_id=in_id,
                           output_id=out_id,
                           label=link['label'],
                           type=link['type']))
                if 'Link' not in ret_dict:
                    ret_dict['Link'] = {'new': []}
                ret_dict['Link']['new'].append((in_id, out_id))

            IMPORT_LOGGER.debug('   (%d new links...)',
                                len(ret_dict.get('Link', {}).get('new', [])))

            IMPORT_LOGGER.debug('STORING GROUP ELEMENTS...')

            import_groups = data['groups_uuid']

            if import_groups:
                progress_bar = get_progress_bar(total=len(import_groups),
                                                disable=silent)
                pbar_base_str = 'Groups - '

            for groupuuid, groupnodes in import_groups.items():
                # # TODO: cache these to avoid too many queries
                qb_group = QueryBuilder().append(
                    Group, filters={'uuid': {
                        '==': groupuuid
                    }})
                group_ = qb_group.first()[0]

                progress_bar.set_description_str(
                    pbar_base_str + 'label={}'.format(group_.label),
                    refresh=False)
                progress_bar.update()

                nodes_ids_to_add = [
                    foreign_ids_reverse_mappings[NODE_ENTITY_NAME][node_uuid]
                    for node_uuid in groupnodes
                ]
                qb_nodes = QueryBuilder().append(
                    Node, filters={'id': {
                        'in': nodes_ids_to_add
                    }})
                # Adding nodes to group avoiding the SQLA ORM to increase speed
                nodes_to_add = [n[0].backend_entity for n in qb_nodes.all()]
                group_.backend_entity.add_nodes(nodes_to_add, skip_orm=True)

            ######################################################
            # Put everything in a specific group
            ######################################################
            existing = existing_entries.get(NODE_ENTITY_NAME, {})
            existing_pk = [
                foreign_ids_reverse_mappings[NODE_ENTITY_NAME][v['uuid']]
                for v in existing.values()
            ]
            new = new_entries.get(NODE_ENTITY_NAME, {})
            new_pk = [
                foreign_ids_reverse_mappings[NODE_ENTITY_NAME][v['uuid']]
                for v in new.values()
            ]

            pks_for_group = existing_pk + new_pk

            # So that we do not create empty groups
            if pks_for_group:
                # If user specified a group, import all things into it
                if not group:
                    from aiida.backends.sqlalchemy.models.group import DbGroup

                    # Get an unique name for the import group, based on the current (local) time
                    basename = timezone.localtime(
                        timezone.now()).strftime('%Y%m%d-%H%M%S')
                    counter = 0
                    group_label = basename
                    while session.query(DbGroup).filter(
                            DbGroup.label == group_label).count() > 0:
                        counter += 1
                        group_label = '{}_{}'.format(basename, counter)

                        if counter == 100:
                            raise exceptions.ImportUniquenessError(
                                "Overflow of import groups (more than 100 import groups exists with basename '{}')"
                                ''.format(basename))
                    group = ImportGroup(label=group_label)
                    session.add(group.backend_entity._dbmodel)

                # Adding nodes to group avoiding the SQLA ORM to increase speed
                builder = QueryBuilder().append(
                    Node, filters={'id': {
                        'in': pks_for_group
                    }})

                progress_bar = get_progress_bar(total=len(pks_for_group),
                                                disable=silent)
                progress_bar.set_description_str(
                    'Creating import Group - Preprocessing', refresh=True)
                first = True

                nodes = []
                for entry in builder.iterall():
                    if first:
                        progress_bar.set_description_str(
                            'Creating import Group', refresh=False)
                        first = False
                    progress_bar.update()
                    nodes.append(entry[0].backend_entity)
                group.backend_entity.add_nodes(nodes, skip_orm=True)
                progress_bar.set_description_str('Done (cleaning up)',
                                                 refresh=True)
            else:
                IMPORT_LOGGER.debug(
                    'No Nodes to import, so no Group created, if it did not already exist'
                )

            IMPORT_LOGGER.debug('COMMITTING EVERYTHING...')
            session.commit()

            # Finalize Progress bar
            close_progress_bar(leave=False)

            # Summarize import
            result_summary(ret_dict, getattr(group, 'label', None))

        except:
            # Finalize Progress bar
            close_progress_bar(leave=False)

            result_summary({}, None)

            IMPORT_LOGGER.debug('Rolling back')
            session.rollback()
            raise

    # Reset logging level
    if silent:
        logging.disable(level=logging.NOTSET)

    return ret_dict
Exemplo n.º 12
0
def _select_entity_data(*, entity_name: str, reader: ArchiveReaderAbstract,
                        new_entries: Dict[str, Dict[str, dict]],
                        existing_entries: Dict[str, Dict[str, dict]],
                        foreign_ids_reverse_mappings: Dict[str, Dict[str,
                                                                     int]],
                        extras_mode_new: str):
    """Select the data to import by comparing the AiiDA database to the archive contents."""
    entity = entity_names_to_entities[entity_name]

    # entity = entity_names_to_entities[entity_name]
    unique_identifier = reader.metadata.unique_identifiers.get(
        entity_name, None)

    # Not necessarily all models are present in the archive
    if entity_name not in reader.entity_names:
        return

    existing_entries.setdefault(entity_name, {})
    new_entries.setdefault(entity_name, {})

    if unique_identifier is None:
        new_entries[entity_name] = {
            str(pk): fields
            for pk, fields in reader.iter_entity_fields(entity_name)
        }
        return

    # skip nodes that are already present in the DB
    import_unique_ids = set(f[unique_identifier]
                            for _, f in reader.iter_entity_fields(
                                entity_name, fields=(unique_identifier, )))

    relevant_db_entries = {}
    if import_unique_ids:
        builder = QueryBuilder()
        builder.append(entity,
                       filters={unique_identifier: {
                           'in': import_unique_ids
                       }},
                       project='*')

        if builder.count():
            with get_progress_reporter()(
                    desc=f'Finding existing entities - {entity_name}',
                    total=builder.count()) as progress:
                for object_ in builder.iterall():
                    progress.update()
                    # Note: UUIDs need to be converted to strings
                    relevant_db_entries.update({
                        str(getattr(object_[0], unique_identifier)):
                        object_[0]
                    })

    foreign_ids_reverse_mappings[entity_name] = {
        k: v.pk
        for k, v in relevant_db_entries.items()
    }

    entity_count = reader.entity_count(entity_name)
    if not entity_count:
        return

    with get_progress_reporter()(
            desc=f'Reading archived entities - {entity_name}',
            total=entity_count) as progress:
        imported_comp_names = set()
        for pk, fields in reader.iter_entity_fields(entity_name):
            if entity_name == GROUP_ENTITY_NAME:
                # Check if there is already a group with the same name,
                # and if so, recreate the name
                orig_label = fields['label']
                dupl_counter = 0
                while QueryBuilder().append(entity,
                                            filters={
                                                'label': {
                                                    '==': fields['label']
                                                }
                                            }).count():
                    # Rename the new group
                    fields['label'] = orig_label + DUPL_SUFFIX.format(
                        dupl_counter)
                    dupl_counter += 1
                    if dupl_counter == MAX_GROUPS:
                        raise exceptions.ImportUniquenessError(
                            f'A group of that label ( {orig_label} ) already exists and I could not create a new one'
                        )

            elif entity_name == COMPUTER_ENTITY_NAME:
                # The following is done for compatibility
                # reasons in case the archive file was generated
                # with the Django export method. In Django the
                # metadata and the transport parameters are
                # stored as (unicode) strings of the serialized
                # JSON objects and not as simple serialized
                # JSON objects.
                if isinstance(fields['metadata'], (str, bytes)):
                    fields['metadata'] = json.loads(fields['metadata'])

                # Check if there is already a computer with the
                # same name in the database
                builder = QueryBuilder()
                builder.append(entity,
                               filters={'name': {
                                   '==': fields['name']
                               }},
                               project=['*'],
                               tag='res')
                dupl = builder.count() or fields['name'] in imported_comp_names
                dupl_counter = 0
                orig_name = fields['name']
                while dupl:
                    # Rename the new computer
                    fields['name'] = orig_name + DUPL_SUFFIX.format(
                        dupl_counter)
                    builder = QueryBuilder()
                    builder.append(entity,
                                   filters={'name': {
                                       '==': fields['name']
                                   }},
                                   project=['*'],
                                   tag='res')
                    dupl = builder.count(
                    ) or fields['name'] in imported_comp_names
                    dupl_counter += 1
                    if dupl_counter == MAX_COMPUTERS:
                        raise exceptions.ImportUniquenessError(
                            f'A computer of that name ( {orig_name} ) already exists and I could not create a new one'
                        )

                imported_comp_names.add(fields['name'])

            if fields[unique_identifier] in relevant_db_entries:
                # Already in DB
                existing_entries[entity_name][str(pk)] = fields
            else:
                # To be added
                if entity_name == NODE_ENTITY_NAME:
                    # format extras
                    fields = _sanitize_extras(fields)
                    if extras_mode_new != 'import':
                        fields.pop('extras', None)
                new_entries[entity_name][str(pk)] = fields