示例#1
0
def cmd_import(ctx, archives, webpages, group, extras_mode_existing,
               extras_mode_new, comment_mode, migration, non_interactive):
    """Import data from an AiiDA archive file.

    The archive can be specified by its relative or absolute file path, or its HTTP URL.
    """
    from six.moves import urllib

    from aiida.common.folders import SandboxFolder
    from aiida.tools.importexport.common.utils import get_valid_import_links

    archives_url = []
    archives_file = []

    # Build list of archives to be imported
    for archive in archives:
        if archive.startswith('http://') or archive.startswith('https://'):
            archives_url.append(archive)
        else:
            archives_file.append(archive)

    # Discover and retrieve *.aiida files at URL(s)
    if webpages is not None:
        for webpage in webpages:
            try:
                echo.echo_info(
                    'retrieving archive URLS from {}'.format(webpage))
                urls = get_valid_import_links(webpage)
            except Exception:
                echo.echo_error(
                    'an exception occurred while trying to discover archives at URL {}'
                    .format(webpage))
                echo.echo(traceback.format_exc())
                if not non_interactive:
                    click.confirm('do you want to continue?', abort=True)
            else:
                echo.echo_success(
                    '{} archive URLs discovered and added'.format(len(urls)))
                archives_url += urls

    # Preliminary sanity check
    if not archives_url + archives_file:
        echo.echo_critical('no valid exported archives were found')

    # Import initialization
    import_opts = {
        'file_to_import': '',
        'archive': '',
        'group': group,
        'migration': migration,
        'extras_mode_existing': ExtrasImportCode[extras_mode_existing].value,
        'extras_mode_new': extras_mode_new,
        'comment_mode': comment_mode,
        'non_interactive': non_interactive
    }

    # Import local archives
    for archive in archives_file:

        echo.echo_info('importing archive {}'.format(archive))

        # Initialization
        import_opts['archive'] = archive
        import_opts['file_to_import'] = import_opts['archive']

        # First attempt to import archive
        migrate_archive = _try_import(migration_performed=False, **import_opts)

        # Migrate archive if needed and desired
        if migrate_archive:
            with SandboxFolder() as temp_folder:
                import_opts['file_to_import'] = _migrate_archive(
                    ctx, temp_folder, **import_opts)
                _try_import(migration_performed=True, **import_opts)

    # Import web-archives
    for archive in archives_url:

        # Initialization
        import_opts['archive'] = archive

        echo.echo_info('downloading archive {}'.format(archive))

        try:
            response = urllib.request.urlopen(archive)
        except Exception as exception:
            echo.echo_warning('downloading archive {} failed: {}'.format(
                archive, exception))

        with SandboxFolder() as temp_folder:
            temp_file = 'importfile.tar.gz'

            # Download archive to temporary file
            temp_folder.create_file_from_filelike(response, temp_file)
            echo.echo_success('archive downloaded, proceeding with import')

            # First attempt to import archive
            import_opts['file_to_import'] = temp_folder.get_abs_path(temp_file)
            migrate_archive = _try_import(migration_performed=False,
                                          **import_opts)

            # Migrate archive if needed and desired
            if migrate_archive:
                import_opts['file_to_import'] = _migrate_archive(
                    ctx, temp_folder, **import_opts)
                _try_import(migration_performed=True, **import_opts)
def populate_builder(remote_data, code=None, metadata=None):
    """create ``crystal17.main`` input nodes from an existing run

    NB: none of the nodes are stored, also
    existing basis will be retrieved if availiable

    Parameters
    ----------
    folder: aiida.common.folders.Folder or str
        folder containing the input and output files
    remote_data: aiida.orm.RemoteData
        containing the input and output files required for parsing
    code: str or aiida.orm.nodes.data.code.Code or None
    metadata: dict or None
        calculation metadata

    Returns
    -------
    aiida.engine.processes.ProcessBuilder

    """
    calc_cls = CalculationFactory("crystal17.main")
    basis_cls = DataFactory("crystal17.basisset")
    struct_cls = DataFactory("structure")
    symmetry_cls = DataFactory("crystal17.symmetry")
    kind_cls = DataFactory("crystal17.kinds")

    # get files
    in_file_name = calc_cls.spec_options.get("input_file_name").default
    out_file_name = calc_cls.spec_options.get("output_main_file_name").default
    if metadata and "options" in metadata:
        in_file_name = metadata["options"].get("input_file_name", in_file_name)
        out_file_name = metadata["options"].get("output_main_file_name",
                                                out_file_name)

    remote_files = remote_data.listdir()

    if in_file_name not in remote_files:
        raise IOError(
            "The input file '{}' is not contained in the remote_data folder. "
            "If it has a different name, change "
            "metadata['options]['input_file_name']".format(in_file_name))
    if out_file_name not in remote_files:
        raise IOError(
            "The output file '{}' is not contained in the remote_data folder. "
            "If it has a different name, change "
            "metadata['options]['output_main_file_name']".format(
                out_file_name))

    with SandboxFolder() as folder:
        remote_data.getfile(in_file_name,
                            os.path.join(folder.abspath, in_file_name))

        with folder.open(in_file_name, mode="r") as handle:
            param_dict, basis_sets, atom_props = extract_data(handle.read())

        remote_data.getfile(out_file_name,
                            os.path.join(folder.abspath, out_file_name))

        with folder.open(out_file_name, mode="r") as handle:
            try:
                data = crystal_stdout.read_crystal_stdout(handle.read())
            except IOError as err:
                raise OutputParsingError(
                    "Error in CRYSTAL 17 run output: {}".format(err))

    # we retrieve the initial primitive geometry and symmetry
    atoms = _create_atoms(data, "initial_geometry")

    # convert fragment (i.e. unfixed) to fixed
    if "fragment" in atom_props:
        frag = atom_props.pop("fragment")
        atom_props["fixed"] = [
            i + 1 for i in range(atoms.get_number_of_atoms())
            if i + 1 not in frag
        ]

    atoms.set_tags(_create_tags(atom_props, atoms))

    structure = struct_cls(ase=atoms)

    if atom_props:
        kind_names = structure.get_kind_names()
        kinds_dict = {"kind_names": kind_names}
        for key, atom_indexes in atom_props.items():
            kv_map = {
                kn: i + 1 in atom_indexes
                for i, kn in enumerate(structure.get_site_kindnames())
            }
            kinds_dict[key] = [kv_map[kn] for kn in kind_names]
        kinds = kind_cls(data=kinds_dict)
    else:
        kinds = None

    symmetry = symmetry_cls(
        data={
            "operations": data["initial_geometry"]["primitive_symmops"],
            "basis": "fractional",
            "hall_number": None,
        })

    bases = {}
    for bset in basis_sets:

        bfile = tempfile.NamedTemporaryFile(delete=False)
        try:
            with open(bfile.name, "w") as f:
                f.write(bset)
            bdata, _ = basis_cls.get_or_create(bfile.name,
                                               use_first=False,
                                               store_basis=False)
            # TODO report if bases created or retrieved
        finally:
            os.remove(bfile.name)

        bases[bdata.element] = bdata

    builder = calc_cls.create_builder(
        param_dict,
        structure,
        bases,
        symmetry=symmetry,
        kinds=kinds,
        code=code,
        metadata=metadata,
    )

    return builder
示例#3
0
    def test_control_of_licenses(self):
        """Test control of licenses."""
        from aiida.common.folders import SandboxFolder
        from aiida.tools.importexport.dbexport import export_tree

        struct = orm.StructureData()
        struct.source = {'license': 'GPL'}
        struct.store()

        folder = SandboxFolder()
        export_tree([struct],
                    folder=folder,
                    silent=True,
                    allowed_licenses=['GPL'])
        # Folder should contain two files of metadata + nodes/
        self.assertEqual(len(folder.get_content_list()), 3)

        folder = SandboxFolder()
        export_tree([struct],
                    folder=folder,
                    silent=True,
                    forbidden_licenses=['Academic'])
        # Folder should contain two files of metadata + nodes/
        self.assertEqual(len(folder.get_content_list()), 3)

        folder = SandboxFolder()
        with self.assertRaises(LicensingException):
            export_tree([struct],
                        folder=folder,
                        silent=True,
                        allowed_licenses=['CC0'])

        folder = SandboxFolder()
        with self.assertRaises(LicensingException):
            export_tree([struct],
                        folder=folder,
                        silent=True,
                        forbidden_licenses=['GPL'])

        def cc_filter(license_):
            return license_.startswith('CC')

        def gpl_filter(license_):
            return license_ == 'GPL'

        def crashing_filter():
            raise NotImplementedError('not implemented yet')

        folder = SandboxFolder()
        with self.assertRaises(LicensingException):
            export_tree([struct],
                        folder=folder,
                        silent=True,
                        allowed_licenses=cc_filter)

        folder = SandboxFolder()
        with self.assertRaises(LicensingException):
            export_tree([struct],
                        folder=folder,
                        silent=True,
                        forbidden_licenses=gpl_filter)

        folder = SandboxFolder()
        with self.assertRaises(LicensingException):
            export_tree([struct],
                        folder=folder,
                        silent=True,
                        allowed_licenses=crashing_filter)

        folder = SandboxFolder()
        with self.assertRaises(LicensingException):
            export_tree([struct],
                        folder=folder,
                        silent=True,
                        forbidden_licenses=crashing_filter)
示例#4
0
def import_data_sqla(in_path,
                     group=None,
                     ignore_unknown_nodes=False,
                     extras_mode_existing='kcl',
                     extras_mode_new='import',
                     comment_mode='newest',
                     silent=False):
    """Import exported AiiDA archive to the AiiDA database and repository.

    Specific for the SQLAlchemy backend.
    If ``in_path`` is a folder, calls extract_tree; otherwise, tries to detect the compression format
    (zip, tar.gz, tar.bz2, ...) and calls the correct function.

    :param in_path: the path to a file or folder that can be imported in AiiDA.
    :type in_path: str

    :param group: Group wherein all imported Nodes will be placed.
    :type group: :py:class:`~aiida.orm.groups.Group`

    :param extras_mode_existing: 3 letter code that will identify what to do with the extras import.
        The first letter acts on extras that are present in the original node and not present in the imported node.
        Can be either:
        'k' (keep it) or
        'n' (do not keep it).
        The second letter acts on the imported extras that are not present in the original node.
        Can be either:
        'c' (create it) or
        'n' (do not create it).
        The third letter defines what to do in case of a name collision.
        Can be either:
        'l' (leave the old value),
        'u' (update with a new value),
        'd' (delete the extra), or
        'a' (ask what to do if the content is different).
    :type extras_mode_existing: str

    :param extras_mode_new: 'import' to import extras of new nodes or 'none' to ignore them.
    :type extras_mode_new: str

    :param comment_mode: Comment import modes (when same UUIDs are found).
        Can be either:
        'newest' (will keep the Comment with the most recent modification time (mtime)) or
        'overwrite' (will overwrite existing Comments with the ones from the import file).
    :type comment_mode: str

    :param silent: suppress prints.
    :type silent: bool

    :return: New and existing Nodes and Links.
    :rtype: dict

    :raises `~aiida.tools.importexport.common.exceptions.ImportValidationError`: if parameters or the contents of
        `metadata.json` or `data.json` can not be validated.
    :raises `~aiida.tools.importexport.common.exceptions.CorruptArchive`: if the provided archive at ``in_path`` is
        corrupted.
    :raises `~aiida.tools.importexport.common.exceptions.IncompatibleArchiveVersionError`: if the provided archive's
        export version is not equal to the export version of AiiDA at the moment of import.
    :raises `~aiida.tools.importexport.common.exceptions.ArchiveImportError`: if there are any internal errors when
        importing.
    :raises `~aiida.tools.importexport.common.exceptions.ImportUniquenessError`: if a new unique entity can not be
        created.
    """
    from aiida.backends.sqlalchemy.models.node import DbNode, DbLink
    from aiida.backends.sqlalchemy.utils import flag_modified

    # This is the export version expected by this function
    expected_export_version = StrictVersion(EXPORT_VERSION)

    # The returned dictionary with new and existing nodes and links
    ret_dict = {}

    # Initial check(s)
    if group:
        if not isinstance(group, Group):
            raise exceptions.ImportValidationError(
                'group must be a Group entity')
        elif not group.is_stored:
            group.store()

    ################
    # EXTRACT DATA #
    ################
    # The sandbox has to remain open until the end
    with SandboxFolder() as folder:
        if os.path.isdir(in_path):
            extract_tree(in_path, folder)
        else:
            if tarfile.is_tarfile(in_path):
                extract_tar(in_path,
                            folder,
                            silent=silent,
                            nodes_export_subfolder=NODES_EXPORT_SUBFOLDER)
            elif zipfile.is_zipfile(in_path):
                extract_zip(in_path,
                            folder,
                            silent=silent,
                            nodes_export_subfolder=NODES_EXPORT_SUBFOLDER)
            else:
                raise exceptions.ImportValidationError(
                    'Unable to detect the input file format, it is neither a '
                    '(possibly compressed) tar file, nor a zip file.')

        if not folder.get_content_list():
            raise exceptions.CorruptArchive(
                'The provided file/folder ({}) is empty'.format(in_path))
        try:
            with open(folder.get_abs_path('metadata.json'),
                      encoding='utf8') as fhandle:
                metadata = json.load(fhandle)

            with open(folder.get_abs_path('data.json'),
                      encoding='utf8') as fhandle:
                data = json.load(fhandle)
        except IOError as error:
            raise exceptions.CorruptArchive(
                'Unable to find the file {} in the import file or folder'.
                format(error.filename))

        ######################
        # PRELIMINARY CHECKS #
        ######################
        export_version = StrictVersion(str(metadata['export_version']))
        if export_version != expected_export_version:
            msg = 'Export file version is {}, can import only version {}'\
                    .format(metadata['export_version'], expected_export_version)
            if export_version < expected_export_version:
                msg += "\nUse 'verdi export migrate' to update this export file."
            else:
                msg += '\nUpdate your AiiDA version in order to import this file.'

            raise exceptions.IncompatibleArchiveVersionError(msg)

        ###################################################################
        #           CREATE UUID REVERSE TABLES AND CHECK IF               #
        #              I HAVE ALL NODES FOR THE LINKS                     #
        ###################################################################
        linked_nodes = set(
            chain.from_iterable(
                (l['input'], l['output']) for l in data['links_uuid']))
        group_nodes = set(chain.from_iterable(data['groups_uuid'].values()))

        # Check that UUIDs are valid
        linked_nodes = set(x for x in linked_nodes if validate_uuid(x))
        group_nodes = set(x for x in group_nodes if validate_uuid(x))

        import_nodes_uuid = set()
        for value in data['export_data'].get(NODE_ENTITY_NAME, {}).values():
            import_nodes_uuid.add(value['uuid'])

        unknown_nodes = linked_nodes.union(group_nodes) - import_nodes_uuid

        if unknown_nodes and not ignore_unknown_nodes:
            raise exceptions.DanglingLinkError(
                'The import file refers to {} nodes with unknown UUID, therefore it cannot be imported. Either first '
                'import the unknown nodes, or export also the parents when exporting. The unknown UUIDs are:\n'
                ''.format(len(unknown_nodes)) +
                '\n'.join('* {}'.format(uuid) for uuid in unknown_nodes))

        ###################################
        # DOUBLE-CHECK MODEL DEPENDENCIES #
        ###################################
        # The entity import order. It is defined by the database model relationships.
        entity_sig_order = [
            entity_names_to_signatures[m]
            for m in (USER_ENTITY_NAME, COMPUTER_ENTITY_NAME, NODE_ENTITY_NAME,
                      GROUP_ENTITY_NAME, LOG_ENTITY_NAME, COMMENT_ENTITY_NAME)
        ]

        #  I make a new list that contains the entity names:
        # eg: ['User', 'Computer', 'Node', 'Group']
        all_entity_names = [
            signatures_to_entity_names[entity_sig]
            for entity_sig in entity_sig_order
        ]
        for import_field_name in metadata['all_fields_info']:
            if import_field_name not in all_entity_names:
                raise exceptions.ImportValidationError(
                    "You are trying to import an unknown model '{}'!".format(
                        import_field_name))

        for idx, entity_sig in enumerate(entity_sig_order):
            dependencies = []
            entity_name = signatures_to_entity_names[entity_sig]
            # for every field, I checked the dependencies given as value for key requires
            for field in metadata['all_fields_info'][entity_name].values():
                try:
                    dependencies.append(field['requires'])
                except KeyError:
                    # (No ForeignKey)
                    pass
            for dependency in dependencies:
                if dependency not in all_entity_names[:idx]:
                    raise exceptions.ArchiveImportError(
                        'Entity {} requires {} but would be loaded first; stopping...'
                        .format(entity_sig, dependency))

        ###################################################
        # CREATE IMPORT DATA DIRECT UNIQUE_FIELD MAPPINGS #
        ###################################################
        # This is nested dictionary of entity_name:{id:uuid}
        # to map one id (the pk) to a different one.
        # One of the things to remove for v0.4
        # {
        # 'Node': {2362: '82a897b5-fb3a-47d7-8b22-c5fe1b4f2c14',
        #           2363: 'ef04aa5d-99e7-4bfd-95ef-fe412a6a3524', 2364: '1dc59576-af21-4d71-81c2-bac1fc82a84a'},
        # 'User': {1: 'aiida@localhost'}
        # }
        import_unique_ids_mappings = {}
        # Export data since v0.3 contains the keys entity_name
        for entity_name, import_data in data['export_data'].items():
            # Again I need the entity_name since that's what's being stored since 0.3
            if entity_name in metadata['unique_identifiers']:
                # I have to reconvert the pk to integer
                import_unique_ids_mappings[entity_name] = {
                    int(k): v[metadata['unique_identifiers'][entity_name]]
                    for k, v in import_data.items()
                }
        ###############
        # IMPORT DATA #
        ###############
        # DO ALL WITH A TRANSACTION
        import aiida.backends.sqlalchemy

        session = aiida.backends.sqlalchemy.get_scoped_session()

        try:
            foreign_ids_reverse_mappings = {}
            new_entries = {}
            existing_entries = {}

            # I first generate the list of data
            for entity_sig in entity_sig_order:
                entity_name = signatures_to_entity_names[entity_sig]
                entity = entity_names_to_entities[entity_name]
                # I get the unique identifier, since v0.3 stored under entity_name
                unique_identifier = metadata['unique_identifiers'].get(
                    entity_name, None)

                # so, new_entries. Also, since v0.3 it makes more sense to use the entity_name
                new_entries[entity_name] = {}
                existing_entries[entity_name] = {}
                foreign_ids_reverse_mappings[entity_name] = {}

                # Not necessarily all models are exported
                if entity_name in data['export_data']:

                    if unique_identifier is not None:
                        import_unique_ids = set(
                            v[unique_identifier]
                            for v in data['export_data'][entity_name].values())

                        relevant_db_entries = dict()
                        if import_unique_ids:
                            builder = QueryBuilder()
                            builder.append(entity,
                                           filters={
                                               unique_identifier: {
                                                   'in': import_unique_ids
                                               }
                                           },
                                           project=['*'],
                                           tag='res')
                            relevant_db_entries = {
                                str(getattr(v[0], unique_identifier)
                                    ):  # str() to convert UUID() to string
                                v[0]
                                for v in builder.all()
                            }

                            foreign_ids_reverse_mappings[entity_name] = {
                                k: v.pk
                                for k, v in relevant_db_entries.items()
                            }

                        imported_comp_names = set()
                        for key, value in data['export_data'][
                                entity_name].items():
                            if entity_name == GROUP_ENTITY_NAME:
                                # Check if there is already a group with the same name,
                                # and if so, recreate the name
                                orig_label = value['label']
                                dupl_counter = 0
                                while QueryBuilder().append(
                                        entity,
                                        filters={
                                            'label': {
                                                '==': value['label']
                                            }
                                        }).count():
                                    # Rename the new group
                                    value[
                                        'label'] = orig_label + DUPL_SUFFIX.format(
                                            dupl_counter)
                                    dupl_counter += 1
                                    if dupl_counter == 100:
                                        raise exceptions.ImportUniquenessError(
                                            'A group of that label ( {} ) already exists and I could not create a new '
                                            'one'.format(orig_label))

                            elif entity_name == COMPUTER_ENTITY_NAME:
                                # The following is done for compatibility
                                # reasons in case the export file was generated
                                # with the Django export method. In Django the
                                # metadata and the transport parameters are
                                # stored as (unicode) strings of the serialized
                                # JSON objects and not as simple serialized
                                # JSON objects.
                                if isinstance(value['metadata'], (str, bytes)):
                                    value['metadata'] = json.loads(
                                        value['metadata'])

                                # Check if there is already a computer with the
                                # same name in the database
                                builder = QueryBuilder()
                                builder.append(
                                    entity,
                                    filters={'name': {
                                        '==': value['name']
                                    }},
                                    project=['*'],
                                    tag='res')
                                dupl = (builder.count() or value['name']
                                        in imported_comp_names)
                                dupl_counter = 0
                                orig_name = value['name']
                                while dupl:
                                    # Rename the new computer
                                    value['name'] = (
                                        orig_name +
                                        DUPL_SUFFIX.format(dupl_counter))
                                    builder = QueryBuilder()
                                    builder.append(entity,
                                                   filters={
                                                       'name': {
                                                           '==': value['name']
                                                       }
                                                   },
                                                   project=['*'],
                                                   tag='res')
                                    dupl = (builder.count() or value['name']
                                            in imported_comp_names)
                                    dupl_counter += 1
                                    if dupl_counter == 100:
                                        raise exceptions.ImportUniquenessError(
                                            'A computer of that name ( {} ) already exists and I could not create a '
                                            'new one'.format(orig_name))

                                imported_comp_names.add(value['name'])

                            if value[unique_identifier] in relevant_db_entries:
                                # Already in DB
                                # again, switched to entity_name in v0.3
                                existing_entries[entity_name][key] = value
                            else:
                                # To be added
                                new_entries[entity_name][key] = value
                    else:
                        # Why the copy:
                        new_entries[entity_name] = data['export_data'][
                            entity_name].copy()

            # Show Comment mode if not silent
            if not silent:
                print('Comment mode: {}'.format(comment_mode))

            # I import data from the given model
            for entity_sig in entity_sig_order:
                entity_name = signatures_to_entity_names[entity_sig]
                entity = entity_names_to_entities[entity_name]
                fields_info = metadata['all_fields_info'].get(entity_name, {})
                unique_identifier = metadata['unique_identifiers'].get(
                    entity_name, '')

                # EXISTING ENTRIES
                for import_entry_pk, entry_data in existing_entries[
                        entity_name].items():
                    unique_id = entry_data[unique_identifier]
                    existing_entry_pk = foreign_ids_reverse_mappings[
                        entity_name][unique_id]
                    import_data = dict(
                        deserialize_field(k,
                                          v,
                                          fields_info=fields_info,
                                          import_unique_ids_mappings=
                                          import_unique_ids_mappings,
                                          foreign_ids_reverse_mappings=
                                          foreign_ids_reverse_mappings)
                        for k, v in entry_data.items())
                    # TODO COMPARE, AND COMPARE ATTRIBUTES

                    if entity_sig is entity_names_to_signatures[
                            COMMENT_ENTITY_NAME]:
                        new_entry_uuid = merge_comment(import_data,
                                                       comment_mode)
                        if new_entry_uuid is not None:
                            entry_data[unique_identifier] = new_entry_uuid
                            new_entries[entity_name][
                                import_entry_pk] = entry_data

                    if entity_name not in ret_dict:
                        ret_dict[entity_name] = {'new': [], 'existing': []}
                    ret_dict[entity_name]['existing'].append(
                        (import_entry_pk, existing_entry_pk))
                    if not silent:
                        print('existing %s: %s (%s->%s)' %
                              (entity_sig, unique_id, import_entry_pk,
                               existing_entry_pk))

                # Store all objects for this model in a list, and store them
                # all in once at the end.
                objects_to_create = list()
                # In the following list we add the objects to be updated
                objects_to_update = list()
                # This is needed later to associate the import entry with the new pk
                import_new_entry_pks = dict()

                # NEW ENTRIES
                for import_entry_pk, entry_data in new_entries[
                        entity_name].items():
                    unique_id = entry_data[unique_identifier]
                    import_data = dict(
                        deserialize_field(k,
                                          v,
                                          fields_info=fields_info,
                                          import_unique_ids_mappings=
                                          import_unique_ids_mappings,
                                          foreign_ids_reverse_mappings=
                                          foreign_ids_reverse_mappings)
                        for k, v in entry_data.items())

                    # We convert the Django fields to SQLA. Note that some of
                    # the Django fields were converted to SQLA compatible
                    # fields by the deserialize_field method. This was done
                    # for optimization reasons in Django but makes them
                    # compatible with the SQLA schema and they don't need any
                    # further conversion.
                    if entity_name in file_fields_to_model_fields:
                        for file_fkey in file_fields_to_model_fields[
                                entity_name]:

                            # This is an exception because the DbLog model defines the `_metadata` column instead of the
                            # `metadata` column used in the Django model. This is because the SqlAlchemy model base
                            # class already has a metadata attribute that cannot be overridden. For consistency, the
                            # `DbLog` class however expects the `metadata` keyword in its constructor, so we should
                            # ignore the mapping here
                            if entity_name == LOG_ENTITY_NAME and file_fkey == 'metadata':
                                continue

                            model_fkey = file_fields_to_model_fields[
                                entity_name][file_fkey]
                            if model_fkey in import_data:
                                continue
                            import_data[model_fkey] = import_data[file_fkey]
                            import_data.pop(file_fkey, None)

                    db_entity = get_object_from_string(
                        entity_names_to_sqla_schema[entity_name])

                    objects_to_create.append(db_entity(**import_data))
                    import_new_entry_pks[unique_id] = import_entry_pk

                if entity_sig == entity_names_to_signatures[NODE_ENTITY_NAME]:
                    if not silent:
                        print(
                            'STORING NEW NODE REPOSITORY FILES & ATTRIBUTES...'
                        )

                    # NEW NODES
                    for object_ in objects_to_create:
                        import_entry_uuid = object_.uuid
                        import_entry_pk = import_new_entry_pks[
                            import_entry_uuid]

                        # Before storing entries in the DB, I store the files (if these are nodes).
                        # Note: only for new entries!
                        subfolder = folder.get_subfolder(
                            os.path.join(NODES_EXPORT_SUBFOLDER,
                                         export_shard_uuid(import_entry_uuid)))
                        if not subfolder.exists():
                            raise exceptions.CorruptArchive(
                                'Unable to find the repository folder for Node with UUID={} in the exported '
                                'file'.format(import_entry_uuid))
                        destdir = RepositoryFolder(
                            section=Repository._section_name,
                            uuid=import_entry_uuid)
                        # Replace the folder, possibly destroying existing previous folders, and move the files
                        # (faster if we are on the same filesystem, and in any case the source is a SandboxFolder)
                        destdir.replace_with_folder(subfolder.abspath,
                                                    move=True,
                                                    overwrite=True)

                        # For Nodes, we also have to store Attributes!
                        # Get attributes from import file
                        try:
                            object_.attributes = data['node_attributes'][str(
                                import_entry_pk)]
                        except KeyError:
                            raise exceptions.CorruptArchive(
                                'Unable to find attribute info for Node with UUID={}'
                                .format(import_entry_uuid))

                        # For DbNodes, we also have to store extras
                        # Get extras from import file
                        if extras_mode_new == 'import':
                            if not silent:
                                print('STORING NEW NODE EXTRAS...')
                            try:
                                extras = data['node_extras'][str(
                                    import_entry_pk)]
                            except KeyError:
                                raise exceptions.CorruptArchive(
                                    'Unable to find extra info for Node with UUID={}'
                                    .format(import_entry_uuid))
                            # TODO: remove when aiida extras will be moved somewhere else
                            # from here
                            extras = {
                                key: value
                                for key, value in extras.items()
                                if not key.startswith('_aiida_')
                            }
                            if object_.node_type.endswith('code.Code.'):
                                extras = {
                                    key: value
                                    for key, value in extras.items()
                                    if not key == 'hidden'
                                }
                            # till here
                            object_.extras = extras
                        elif extras_mode_new == 'none':
                            if not silent:
                                print('SKIPPING NEW NODE EXTRAS...')
                        else:
                            raise exceptions.ImportValidationError(
                                "Unknown extras_mode_new value: {}, should be either 'import' or 'none'"
                                ''.format(extras_mode_new))

                    # EXISTING NODES (Extras)
                    if not silent:
                        print(
                            'UPDATING EXISTING NODE EXTRAS (mode: {})'.format(
                                extras_mode_existing))

                    import_existing_entry_pks = {
                        entry_data[unique_identifier]: import_entry_pk
                        for import_entry_pk, entry_data in
                        existing_entries[entity_name].items()
                    }
                    for node in session.query(DbNode).filter(
                            DbNode.uuid.in_(import_existing_entry_pks)).all():
                        import_entry_uuid = str(node.uuid)
                        import_entry_pk = import_existing_entry_pks[
                            import_entry_uuid]

                        # Get extras from import file
                        try:
                            extras = data['node_extras'][str(import_entry_pk)]
                        except KeyError:
                            raise exceptions.CorruptArchive(
                                'Unable to find extra info for Node with UUID={}'
                                .format(import_entry_uuid))

                        # TODO: remove when aiida extras will be moved somewhere else
                        # from here
                        extras = {
                            key: value
                            for key, value in extras.items()
                            if not key.startswith('_aiida_')
                        }
                        if node.node_type.endswith('code.Code.'):
                            extras = {
                                key: value
                                for key, value in extras.items()
                                if not key == 'hidden'
                            }
                        # till here
                        node.extras = merge_extras(node.extras, extras,
                                                   extras_mode_existing)
                        flag_modified(node, 'extras')
                        objects_to_update.append(node)

                # Store them all in once; However, the PK are not set in this way...
                if objects_to_create:
                    session.add_all(objects_to_create)
                if objects_to_update:
                    session.add_all(objects_to_update)

                session.flush()

                if import_new_entry_pks.keys():
                    builder = QueryBuilder()
                    builder.append(entity,
                                   filters={
                                       unique_identifier: {
                                           'in':
                                           list(import_new_entry_pks.keys())
                                       }
                                   },
                                   project=[unique_identifier, 'id'],
                                   tag='res')
                    just_saved = {v[0]: v[1] for v in builder.all()}
                else:
                    just_saved = dict()

                # Now I have the PKs, print the info
                # Moreover, add newly created Nodes to foreign_ids_reverse_mappings
                for unique_id, new_pk in just_saved.items():
                    from uuid import UUID
                    if isinstance(unique_id, UUID):
                        unique_id = str(unique_id)
                    import_entry_pk = import_new_entry_pks[unique_id]
                    foreign_ids_reverse_mappings[entity_name][
                        unique_id] = new_pk
                    if entity_name not in ret_dict:
                        ret_dict[entity_name] = {'new': [], 'existing': []}
                    ret_dict[entity_name]['new'].append(
                        (import_entry_pk, new_pk))

                    if not silent:
                        print('NEW %s: %s (%s->%s)' %
                              (entity_sig, unique_id, import_entry_pk, new_pk))

            if not silent:
                print('STORING NODE LINKS...')

            import_links = data['links_uuid']

            for link in import_links:
                # Check for dangling Links within the, supposed, self-consistent archive
                try:
                    in_id = foreign_ids_reverse_mappings[NODE_ENTITY_NAME][
                        link['input']]
                    out_id = foreign_ids_reverse_mappings[NODE_ENTITY_NAME][
                        link['output']]
                except KeyError:
                    if ignore_unknown_nodes:
                        continue
                    raise exceptions.ImportValidationError(
                        'Trying to create a link with one or both unknown nodes, stopping (in_uuid={}, out_uuid={}, '
                        'label={}, type={})'.format(link['input'],
                                                    link['output'],
                                                    link['label'],
                                                    link['type']))

                # Since backend specific Links (DbLink) are not validated upon creation, we will now validate them.
                source = QueryBuilder().append(Node,
                                               filters={
                                                   'id': in_id
                                               },
                                               project='*').first()[0]
                target = QueryBuilder().append(Node,
                                               filters={
                                                   'id': out_id
                                               },
                                               project='*').first()[0]
                link_type = LinkType(link['type'])

                # Check for existence of a triple link, i.e. unique triple.
                # If it exists, then the link already exists, continue to next link, otherwise, validate link.
                if link_triple_exists(source, target, link_type,
                                      link['label']):
                    continue

                try:
                    validate_link(source, target, link_type, link['label'])
                except ValueError as why:
                    raise exceptions.ImportValidationError(
                        'Error occurred during Link validation: {}'.format(
                            why))

                # New link
                session.add(
                    DbLink(input_id=in_id,
                           output_id=out_id,
                           label=link['label'],
                           type=link['type']))
                if 'Link' not in ret_dict:
                    ret_dict['Link'] = {'new': []}
                ret_dict['Link']['new'].append((in_id, out_id))

            if not silent:
                print('   ({} new links...)'.format(
                    len(ret_dict.get('Link', {}).get('new', []))))

            if not silent:
                print('STORING GROUP ELEMENTS...')
            import_groups = data['groups_uuid']
            for groupuuid, groupnodes in import_groups.items():
                # # TODO: cache these to avoid too many queries
                qb_group = QueryBuilder().append(
                    Group, filters={'uuid': {
                        '==': groupuuid
                    }})
                group_ = qb_group.first()[0]
                nodes_ids_to_add = [
                    foreign_ids_reverse_mappings[NODE_ENTITY_NAME][node_uuid]
                    for node_uuid in groupnodes
                ]
                qb_nodes = QueryBuilder().append(
                    Node, filters={'id': {
                        'in': nodes_ids_to_add
                    }})
                # Adding nodes to group avoiding the SQLA ORM to increase speed
                nodes_to_add = [n[0].backend_entity for n in qb_nodes.all()]
                group_.backend_entity.add_nodes(nodes_to_add, skip_orm=True)

            ######################################################
            # Put everything in a specific group
            ######################################################
            existing = existing_entries.get(NODE_ENTITY_NAME, {})
            existing_pk = [
                foreign_ids_reverse_mappings[NODE_ENTITY_NAME][v['uuid']]
                for v in existing.values()
            ]
            new = new_entries.get(NODE_ENTITY_NAME, {})
            new_pk = [
                foreign_ids_reverse_mappings[NODE_ENTITY_NAME][v['uuid']]
                for v in new.values()
            ]

            pks_for_group = existing_pk + new_pk

            # So that we do not create empty groups
            if pks_for_group:
                # If user specified a group, import all things into it
                if not group:
                    from aiida.backends.sqlalchemy.models.group import DbGroup

                    # Get an unique name for the import group, based on the current (local) time
                    basename = timezone.localtime(
                        timezone.now()).strftime('%Y%m%d-%H%M%S')
                    counter = 0
                    group_label = basename
                    while session.query(DbGroup).filter(
                            DbGroup.label == group_label).count() > 0:
                        counter += 1
                        group_label = '{}_{}'.format(basename, counter)

                        if counter == 100:
                            raise exceptions.ImportUniquenessError(
                                "Overflow of import groups (more than 100 import groups exists with basename '{}')"
                                ''.format(basename))
                    group = Group(label=group_label,
                                  type_string=IMPORTGROUP_TYPE)
                    session.add(group.backend_entity._dbmodel)

                # Adding nodes to group avoiding the SQLA ORM to increase speed
                nodes = [
                    entry[0].backend_entity for entry in QueryBuilder().append(
                        Node, filters={
                            'id': {
                                'in': pks_for_group
                            }
                        }).all()
                ]
                group.backend_entity.add_nodes(nodes, skip_orm=True)
                if not silent:
                    print(
                        "IMPORTED NODES ARE GROUPED IN THE IMPORT GROUP LABELED '{}'"
                        .format(group.label))
            else:
                if not silent:
                    print(
                        'NO NODES TO IMPORT, SO NO GROUP CREATED, IF IT DID NOT ALREADY EXIST'
                    )

            if not silent:
                print('COMMITTING EVERYTHING...')
            session.commit()
        except:
            if not silent:
                print('Rolling back')
            session.rollback()
            raise

    if not silent:
        print('DONE.')

    return ret_dict
示例#5
0
    def create_input_nodes(self,
                           open_transport,
                           input_file_name=None,
                           output_file_name=None,
                           remote_workdir=None):
        """Create calculation input nodes based on the job's files.

        :param open_transport: An open instance of the transport class of the
            calculation's computer. See the tutorial for more information.
        :type open_transport: aiida.transport.plugins.local.LocalTransport
            or aiida.transport.plugins.ssh.SshTransport


        This method parses the files in the job's remote working directory to
        create the input nodes that would exist if the calculation were
        submitted using AiiDa. These nodes are

            * a ``'parameters'`` Dict node, based on the namelists and
              their variable-value pairs;
            * a ``'kpoints'`` KpointsData node, based on the *K_POINTS* card;
            * a ``'structure'`` StructureData node, based on the
              *ATOMIC_POSITIONS* and *CELL_PARAMETERS* cards;
            * one ``'pseudo_X'`` UpfData node for the pseudopotential used for
              the atomic species with name ``X``, as specified in the
              *ATOMIC_SPECIES* card;
            * a ``'settings'`` Dict node, if there are any fixed
              coordinates, or if the gamma kpoint is used;

        and can be retrieved as a dictionary using the ``get_incoming()``
        method. *These input links are cached-links; nothing is stored by this
        method (including the calculation node itself).*

        .. note:: QE stores the calculation's pseudopotential files in the
            ``<outdir>/<prefix>.save/`` subfolder of the job's working
            directory, where ``outdir`` and ``prefix`` are QE *CONTROL*
            variables (see
            `pw input file description <http://www.quantum-espresso.org/wp-content/uploads/Doc/INPUT_PW.html>`_).
            This method uses these files to either get--if the a node already
            exists for the pseudo--or create a UpfData node for each
            pseudopotential.


        **Keyword arguments**

        .. note:: These keyword arguments can also be set when instantiating the
            class or using the ``set_`` methods (e.g. ``set_remote_workdir``).
            Offering to set them here simply offers the user an additional
            place to set their values. *Only the values that have not yet been
            set need to be specified.*

        :param input_file_name: The file name of the job's input file.
        :type input_file_name: str

        :param output_file_name: The file name of the job's output file (i.e.
            the file containing the stdout of QE).
        :type output_file_name: str

        :param remote_workdir: Absolute path to the directory where the job
            was run. The transport of the computer you link ask input to the
            calculation is the transport that will be used to retrieve the
            calculation's files. Therefore, ``remote_workdir`` should be the
            absolute path to the job's directory on that computer.
        :type remote_workdir: str

        :raises aiida.common.exceptions.InputValidationError: if
            ``open_transport`` is a different type of transport than the
            computer's.
        :raises aiida.common.exceptions.InvalidOperation: if
            ``open_transport`` is not open.
        :raises aiida.common.exceptions.InputValidationError: if
            ``remote_workdir``, ``input_file_name``, and/or ``output_file_name``
            are not set prior to or during the call of this method.
        :raises aiida.common.exceptions.FeatureNotAvailable: if the input file
            uses anything other than ``ibrav = 0``, which is not currently
            implimented in aiida.
        :raises aiida.common.exceptions.ParsingError: if there are issues
            parsing the input file.
        :raises IOError: if there are issues reading the input file.
        """
        import re
        # Make sure the remote workdir and input + output file names were
        # provided either before or during the call to this method. If they
        # were just provided during this method call, store the values.
        if remote_workdir is not None:
            self.set_remote_workdir(remote_workdir)
        elif self.get_attr('remote_workdir', None) is None:
            raise InputValidationError(
                'The remote working directory has not been specified.\n'
                'Please specify it using one of the following...\n '
                '(a) pass as a keyword argument to create_input_nodes\n'
                '    [create_input_nodes(remote_workdir=your_remote_workdir)]\n'
                '(b) pass as a keyword argument when instantiating\n '
                '    [calc = PwCalculationImport(remote_workdir='
                'your_remote_workdir)]\n'
                '(c) use the set_remote_workdir method\n'
                '    [calc.set_remote_workdir(your_remote_workdir)]')
        if input_file_name is not None:
            self._INPUT_FILE_NAME = input_file_name
        elif self._INPUT_FILE_NAME is None:
            raise InputValidationError(
                'The input file_name has not been specified.\n'
                'Please specify it using one of the following...\n '
                '(a) pass as a keyword argument to create_input_nodes\n'
                '    [create_input_nodes(input_file_name=your_file_name)]\n'
                '(b) pass as a keyword argument when instantiating\n '
                '    [calc = PwCalculationImport(input_file_name='
                'your_file_name)]\n'
                '(c) use the set_input_file_name method\n'
                '    [calc.set_input_file_name(your_file_name)]')
        if output_file_name is not None:
            self._OUTPUT_FILE_NAME = output_file_name
        elif self._OUTPUT_FILE_NAME is None:
            raise InputValidationError(
                'The input file_name has not been specified.\n'
                'Please specify it using one of the following...\n '
                '(a) pass as a keyword argument to create_input_nodes\n'
                '    [create_input_nodes(output_file_name=your_file_name)]\n'
                '(b) pass as a keyword argument when instantiating\n '
                '    [calc = PwCalculationImport(output_file_name='
                'your_file_name)]\n'
                '(c) use the set_output_file_name method\n'
                '    [calc.set_output_file_name(your_file_name)]')

        # Check that open_transport is the correct transport type.
        if type(open_transport) is not self.get_computer().get_transport_class(
        ):
            raise InputValidationError(
                'The transport passed as the `open_transport` parameter is '
                'not the same transport type linked to the computer. Please '
                'obtain the correct transport class using the '
                "`get_transport_class` method of the calculation's computer. "
                'See the tutorial for more information.')

        # Check that open_transport is actually open.
        if not open_transport._is_open:
            raise InvalidOperation(
                'The transport passed as the `open_transport` parameter is '
                "not open. Please execute the open the transport using it's "
                '`open` method, or execute the call to this method within a '
                '`with` statement context guard. See the tutorial for more '
                'information.')

        # Copy the input file and psuedo files to a temp folder for parsing.
        with SandboxFolder() as folder:

            # Copy the input file to the temp folder.
            remote_path = os.path.join(self._get_remote_workdir(),
                                       self._INPUT_FILE_NAME)
            open_transport.get(remote_path, folder.abspath)

            # Parse the input file.
            local_path = os.path.join(folder.abspath, self._INPUT_FILE_NAME)
            with open(local_path) as fin:
                pwinputfile = pwinputparser.PwInputFile(fin.read())

            # Determine PREFIX, if it hasn't already been set by the user.
            if self._PREFIX is None:
                control_dict = pwinputfile.namelists['CONTROL']
                # If prefix is not set in input file, use the default,
                # 'pwscf'.
                self._PREFIX = control_dict.get('prefix', 'pwscf')

            # Determine _OUTPUT_SUBFOLDER, if it hasn't already been set by
            # the user.
            # TODO: Prompt user before using the environment variable???
            if self._OUTPUT_SUBFOLDER is None:
                # See if it's specified in the CONTROL namelist.
                control_dict = pwinputfile.namelists['CONTROL']
                self._OUTPUT_SUBFOLDER = control_dict.get('outdir', None)
                if self._OUTPUT_SUBFOLDER is None:
                    # See if the $ESPRESSO_TMPDIR is set.
                    envar = open_transport.exec_command_wait(
                        'echo $ESPRESSO_TMPDIR')[1]
                    if len(envar.strip()) > 0:
                        self._OUTPUT_SUBFOLDER = envar.strip()
                    else:
                        # Use the default dir--the dir job was submitted in.
                        self._OUTPUT_SUBFOLDER = self._get_remote_workdir()

            # Copy the pseudo files to the temp folder.
            for fnm in pwinputfile.atomic_species['pseudo_file_names']:
                remote_path = os.path.join(self._get_remote_workdir(),
                                           self._OUTPUT_SUBFOLDER,
                                           '{}.save/'.format(self._PREFIX),
                                           fnm)
                open_transport.get(remote_path, folder.abspath)

            # Make sure that ibrav = 0, since aiida doesn't support anything
            # else.
            if pwinputfile.namelists['SYSTEM']['ibrav'] != 0:
                raise FeatureNotAvailable(
                    'Found ibrav !=0 while parsing the input file. '
                    'Currently, AiiDa only supports ibrav = 0.')

            # Create Dict node based on the namelist and link as input.

            # First, strip the namelist items that aiida doesn't allow or sets
            # later.
            # NOTE: ibrav = 0 is checked above.
            # NOTE: If any of the position or cell units are in alat or crystal
            # units, that will be taken care of by the input parsing tools, and
            # we are safe to fake that they were never there in the first place.
            parameters_dict = deepcopy(pwinputfile.namelists)
            for namelist, blocked_key in self._blocked_keywords:
                keys = list(parameters_dict[namelist].keys())
                for this_key in parameters_dict[namelist].keys():
                    # take into account that celldm and celldm(*) must be blocked
                    if re.sub('[(0-9)]', '', this_key) == blocked_key:
                        parameters_dict[namelist].pop(this_key, None)

            parameters = Dict(dict=parameters_dict)
            self.use_parameters(parameters)

            # Initialize the dictionary for settings parameter data for possible
            # use later for gamma kpoint and fixed coordinates.
            settings_dict = {}

            # Create a KpointsData node based on the K_POINTS card block
            # and link as input.
            kpointsdata = pwinputfile.get_kpointsdata()
            self.use_kpoints(kpointsdata)
            # If only the gamma kpoint is used, add to the settings dictionary.
            if pwinputfile.k_points['type'] == 'gamma':
                settings_dict['gamma_only'] = True

            # Create a StructureData node based on the ATOMIC_POSITIONS,
            # CELL_PARAMETERS, and ATOMIC_SPECIES card blocks, and link as
            # input.
            structuredata = pwinputfile.get_structuredata()
            self.use_structure(structuredata)

            # Get or create a UpfData node for the pseudopotentials used for
            # the calculation.
            names = pwinputfile.atomic_species['names']
            pseudo_file_names = pwinputfile.atomic_species['pseudo_file_names']
            for name, fnm in zip(names, pseudo_file_names):
                local_path = os.path.join(folder.abspath, fnm)
                pseudo, created = UpfData.get_or_create(local_path)
                self.use_pseudo(pseudo, kind=name)

        # If there are any fixed coordinates (i.e. force modification
        # present in the input file, create a Dict node for these
        # special settings.
        fixed_coords = pwinputfile.atomic_positions['fixed_coords']
        # NOTE: any() only works for 1-dimensional lists.
        if any((any(fc_xyz) for fc_xyz in fixed_coords)):
            settings_dict['FIXED_COORDS'] = fixed_coords

        # If the settings_dict has been filled in, create a Dict
        # node from it and link as input.
        if settings_dict:
            self.use_settings(Dict(dict=settings_dict))

        self.set_attribute('input_nodes_created', True)
示例#6
0
 def __enter__(self):
     """Instantiate a SandboxFolder into which the archive can be lazily unpacked."""
     self._folder = SandboxFolder()
     return self
示例#7
0
def fixture_sandbox():
    """Return a `SandboxFolder`."""
    from aiida.common.folders import SandboxFolder
    with SandboxFolder() as folder:
        yield folder
示例#8
0
def migrate(file_input, file_output, force, silent):
    """
    An entry point to migrate existing AiiDA export archives between version numbers
    """
    import os, json, sys
    import tarfile, zipfile
    from aiida.common.folders import SandboxFolder
    from aiida.common.archive import extract_zip, extract_tar

    if os.path.exists(file_output) and not force:
        print >> sys.stderr, 'Error: the output file already exists'
        sys.exit(2)

    with SandboxFolder(sandbox_in_repo=False) as folder:

        if zipfile.is_zipfile(file_input):
            archive_format = 'zip'
            extract_zip(file_input, folder, silent=silent)
        elif tarfile.is_tarfile(file_input):
            archive_format = 'tar.gz'
            extract_tar(file_input, folder, silent=silent)
        else:
            print >> sys.stderr, 'Error: invalid file format, expected either a zip archive or gzipped tarball'
            sys.exit(2)

        try:
            with open(folder.get_abs_path('data.json')) as f:
                data = json.load(f)
            with open(folder.get_abs_path('metadata.json')) as f:
                metadata = json.load(f)
        except IOError as e:
            raise ValueError(
                'export archive does not contain the required file {}'.format(
                    e.filename))

        old_version = verify_metadata_version(metadata)

        try:
            if old_version == '0.1':
                migrate_v1_to_v2(metadata, data)
            elif old_version == '0.2':
                try:
                    migrate_v2_to_v3(metadata, data)
                except DanglingLinkError as e:
                    print "An exception occured!"
                    print e
                    raise RuntimeError(
                        "You're export file is broken because it contains dangling links"
                    )
            else:
                raise ValueError(
                    'cannot migrate from version {}'.format(old_version))
        except ValueError as exception:
            print >> sys.stderr, 'Error:', exception
            sys.exit(2)

        new_version = verify_metadata_version(metadata)

        with open(folder.get_abs_path('data.json'), 'w') as f:
            json.dump(data, f)

        with open(folder.get_abs_path('metadata.json'), 'w') as f:
            json.dump(metadata, f)

        if archive_format == 'zip':
            with zipfile.ZipFile(file_output,
                                 mode='w',
                                 compression=zipfile.ZIP_DEFLATED) as archive:
                src = folder.abspath
                for dirpath, dirnames, filenames in os.walk(src):
                    relpath = os.path.relpath(dirpath, src)
                    for fn in dirnames + filenames:
                        real_src = os.path.join(dirpath, fn)
                        real_dest = os.path.join(relpath, fn)
                        archive.write(real_src, real_dest)
        elif archive_format == 'tar.gz':
            with tarfile.open(file_output,
                              'w:gz',
                              format=tarfile.PAX_FORMAT,
                              dereference=True) as archive:
                archive.add(folder.abspath, arcname='')

        if not silent:
            print 'Successfully migrated the archive from version {} to {}'.format(
                old_version, new_version)
def retrieve_calculation(calculation, transport, retrieved_temporary_folder):
    """Retrieve all the files of a completed job calculation using the given transport.

    If the job defined anything in the `retrieve_temporary_list`, those entries will be stored in the
    `retrieved_temporary_folder`. The caller is responsible for creating and destroying this folder.

    :param calculation: the instance of CalcJobNode to update.
    :param transport: an already opened transport to use for the retrieval.
    :param retrieved_temporary_folder: the absolute path to a directory in which to store the files
        listed, if any, in the `retrieved_temporary_folder` of the jobs CalcInfo
    """
    logger_extra = get_dblogger_extra(calculation)
    workdir = calculation.get_remote_workdir()

    execlogger.debug('Retrieving calc {}'.format(calculation.pk), extra=logger_extra)
    execlogger.debug('[retrieval of calc {}] chdir {}'.format(calculation.pk, workdir), extra=logger_extra)

    # If the calculation already has a `retrieved` folder, simply return. The retrieval was apparently already completed
    # before, which can happen if the daemon is restarted and it shuts down after retrieving but before getting the
    # chance to perform the state transition. Upon reloading this calculation, it will re-attempt the retrieval.
    link_label = calculation.link_label_retrieved
    if calculation.get_outgoing(FolderData, link_label_filter=link_label).first():
        execlogger.warning('CalcJobNode<{}> already has a `{}` output folder: skipping retrieval'.format(
            calculation.pk, link_label))
        return

    # Create the FolderData node into which to store the files that are to be retrieved
    retrieved_files = FolderData()

    with transport:
        transport.chdir(workdir)

        # First, retrieve the files of folderdata
        retrieve_list = calculation.get_retrieve_list()
        retrieve_temporary_list = calculation.get_retrieve_temporary_list()
        retrieve_singlefile_list = calculation.get_retrieve_singlefile_list()

        with SandboxFolder() as folder:
            retrieve_files_from_list(calculation, transport, folder.abspath, retrieve_list)
            # Here I retrieved everything; now I store them inside the calculation
            retrieved_files.put_object_from_tree(folder.abspath)

        # Second, retrieve the singlefiles, if any files were specified in the 'retrieve_temporary_list' key
        if retrieve_singlefile_list:
            with SandboxFolder() as folder:
                _retrieve_singlefiles(calculation, transport, folder, retrieve_singlefile_list, logger_extra)

        # Retrieve the temporary files in the retrieved_temporary_folder if any files were
        # specified in the 'retrieve_temporary_list' key
        if retrieve_temporary_list:
            retrieve_files_from_list(calculation, transport, retrieved_temporary_folder, retrieve_temporary_list)

            # Log the files that were retrieved in the temporary folder
            for filename in os.listdir(retrieved_temporary_folder):
                execlogger.debug("[retrieval of calc {}] Retrieved temporary file or folder '{}'".format(
                    calculation.pk, filename), extra=logger_extra)

        # Store everything
        execlogger.debug(
            '[retrieval of calc {}] '
            'Storing retrieved_files={}'.format(calculation.pk, retrieved_files.pk),
            extra=logger_extra)
        retrieved_files.store()

    # Make sure that attaching the `retrieved` folder with a link is the last thing we do. This gives the biggest chance
    # of making this method idempotent. That is to say, if a runner gets interrupted during this action, it will simply
    # retry the retrieval, unless we got here and managed to link it up, in which case we move to the next task.
    retrieved_files.add_incoming(calculation, link_type=LinkType.CREATE, link_label=calculation.link_label_retrieved)