Exemplo n.º 1
0
 def _check_calc_compatibility(self, calc):
     from aiida.common.exceptions import ParsingError
     if not isinstance(calc, NwcpymatgenCalculation):
         raise ParsingError("Input calc must be a NwcpymatgenCalculation")
Exemplo n.º 2
0
    def parse_bases_from_directory(cls,
                                   dirpath,
                                   basis_type=None,
                                   deduplicate=True):
        """Parse the basis files in the given directory into a list of data nodes.

        .. note:: The directory pointed to by `dirpath` should only contain basis files. Optionally, it can
            contain just a single directory, that contains all the basis files. If any other files are stored
            in the basepath or the subdirectory, that cannot be successfully parsed as basis files the method
            will raise a ``ValueError``.

        :param dirpath: absolute path to a directory containing bases.
        :param basis_type: subclass of ``BasisData`` to be used for the parsed bases. If not specified and
            the basis set only defines a single supported basis type in ``_basis_types`` then that will be used,
            otherwise a ``ValueError`` is raised.
        :param deduplicate: if True, will scan database for existing bases of same type and with the same
            md5 checksum, and use that instead of the parsed one.
        :return: list of data nodes
        :raises ValueError: if ``dirpath`` is not a directory or contains anything other than files.
        :raises ValueError: if ``dirpath`` contains multiple bases for the same element.
        :raises ValueError: if ``basis_type`` is explicitly specified and is not supported by this basis set class.
        :raises ValueError: if ``basis_type`` is not specified and the class supports more than one basis type.
        :raises ParsingError: if the constructor of the basis type fails for one of the files in the ``dirpath``.
        """
        from aiida.common.exceptions import ParsingError

        bases = []
        dirpath = cls._validate_dirpath(dirpath)
        basis_type = cls._validate_basis_type(basis_type)

        for filename in os.listdir(dirpath):
            filepath = os.path.join(dirpath, filename)

            if not os.path.isfile(filepath):
                raise ValueError(
                    f'dirpath `{dirpath}` contains at least one entry that is not a file'
                )

            with open(filepath, 'rb') as handle:
                try:
                    if deduplicate:
                        basis = basis_type.get_or_create(handle,
                                                         filename=filename)
                    else:
                        basis = basis_type(handle, filename=filename)
                except ParsingError as exception:
                    raise ParsingError(
                        f'failed to parse `{filepath}`: {exception}'
                    ) from exception

            if basis.element is None:
                match = re.search(r'^([A-Za-z]{1,2})\.\w+', filename)
                if match is None:
                    raise ParsingError(
                        f'`{basis.__class__}` constructor did not define the element and could not parse a valid '
                        'element symbol from the filename `{filename}` either. It should have the format '
                        '`ELEMENT.EXTENSION`')
                basis.element = match.group(1)
            bases.append(basis)

        if not bases:
            raise ValueError(f'no bases were parsed from `{dirpath}`')

        elements = set(basis.element for basis in bases)

        if len(bases) != len(elements):
            raise ValueError(
                f'directory `{dirpath}` contains bases with duplicate elements'
            )

        return bases
Exemplo n.º 3
0
def parse_upf(fname, check_filename=True):
    """
    Try to get relevant information from the UPF. For the moment, only the
    element name. Note that even UPF v.2 cannot be parsed with the XML minidom!
    (e.g. due to the & characters in the human-readable section).

    If check_filename is True, raise a ParsingError exception if the filename
    does not start with the element name.
    """
    import os

    from aiida.common.exceptions import ParsingError
    from aiida.common import AIIDA_LOGGER
    from aiida.orm.nodes.data.structure import _valid_symbols

    parsed_data = {}

    try:
        upf_contents = fname.read()
        fname = fname.name
    except AttributeError:
        with open(fname, encoding='utf8') as handle:
            upf_contents = handle.read()

    match = REGEX_UPF_VERSION.search(upf_contents)
    if match:
        version = match.group('version')
        AIIDA_LOGGER.debug(f'Version found: {version} for file {fname}')
    else:
        AIIDA_LOGGER.debug(f'Assuming version 1 for file {fname}')
        version = '1'

    parsed_data['version'] = version
    try:
        version_major = int(version.partition('.')[0])
    except ValueError:
        # If the version string does not contain a dot, fallback
        # to version 1
        AIIDA_LOGGER.debug(
            f'Falling back to version 1 for file {fname} version string {version} unrecognized'
        )
        version_major = 1

    element = None
    if version_major == 1:
        match = REGEX_ELEMENT_V1.search(upf_contents)
        if match:
            element = match.group('element_name')
    else:  # all versions > 1
        match = REGEX_ELEMENT_V2.search(upf_contents)
        if match:
            element = match.group('element_name')

    if element is None:
        raise ParsingError(f'Unable to find the element of UPF {fname}')
    element = element.capitalize()
    if element not in _valid_symbols:
        raise ParsingError(
            f'Unknown element symbol {element} for file {fname}')
    if check_filename:
        if not os.path.basename(fname).lower().startswith(element.lower()):
            raise ParsingError('Filename {0} was recognized for element '
                               '{1}, but the filename does not start '
                               'with {1}'.format(fname, element))

    parsed_data['element'] = element

    return parsed_data
Exemplo n.º 4
0
def parse_atomic_species(txt):
    """
    Return a dictionary containing info from the ATOMIC_SPECIES card block
    in txt.

    :param txt: A single string containing the QE input text to be parsed.
    :type txt: str

    :returns:
        A dictionary with

            * names: list of the atom names (e.g. 'Si', 'Si0', 'Si_0') (case
              as-is)
            * masses: list of the masses of the atoms in 'names'
            * pseudo_file_names: list of the pseudopotential file names for the
              atoms in 'names' (case as-is)

        Example::

            {'names': ['Li', 'O', 'Al', 'Si'],
             'masses': [6.941,  15.9994, 26.98154, 28.0855],
             'pseudo_file_names': ['Li.pbe-sl-rrkjus_psl.1.0.0.UPF',
                                   'O.pbe-nl-rrkjus_psl.1.0.0.UPF',
                                   'Al.pbe-nl-rrkjus_psl.1.0.0.UPF',
                                   'Si3 28.0855 Si.pbe-nl-rrkjus_psl.1.0.0.UPF']

    :raises aiida.common.exceptions.ParsingError: if there are issues
        parsing the input.
    """
    # Define re for atomic species card block.
    atomic_species_block_re = re.compile(
        r"""
        ^ [ \t]* ATOMIC_SPECIES [ \t]* $\n
        (?P<block>
         (?:
          ^ [ \t]* \S+ [ \t]+ \S+ [ \t]+ \S+ [ \t]* $\n?
         )+
        )
        """, RE_FLAGS)
    # Define re for the info contained in the block.
    atomic_species_re = re.compile(
        r"""
        ^ [ \t]* (?P<name>\S+) [ \t]+ (?P<mass>\S+) [ \t]+ (?P<pseudo>\S+)
            [ \t]* $\n?
        """, RE_FLAGS)
    # Find the card block and extract units and the lines of the block.
    try:
        match = atomic_species_block_re.search(txt)
    except AttributeError:
        raise ParsingError('The ATOMIC_SPECIES card block was not found in\n' +
                           txt)
    # Make sure the card block lines were extracted. If they were, store the
    # string of lines as blockstr.
    if match.group('block') is None:
        raise ParsingError(
            'The ATOMIC_POSITIONS card block was parse as empty in\n' + txt)
    else:
        blockstr = match.group('block')
    # Define a small helper function to convert strings of fortran-type floats.
    fortfloat = lambda s: float(s.replace('d', 'e').replace('D', 'E'))
    # Now, extract the name, mass, and pseudopotential file name from each line
    # of the card block.
    names, masses, pseudo_fnms = [], [], []
    for match in atomic_species_re.finditer(blockstr):
        names.append(match.group('name'))
        masses.append(fortfloat(match.group('mass')))
        pseudo_fnms.append(match.group('pseudo'))
    info_dict = dict(names=names, masses=masses, pseudo_file_names=pseudo_fnms)
    return info_dict
Exemplo n.º 5
0
    def parse(self, retrieved, **kwargs):
        """Parses the datafolder, stores results.

        This parser for this code ...
        """
        from aiida.common.exceptions import InvalidOperation
        from aiida.common import exceptions
        from aiida.common import AIIDA_LOGGER

        # suppose at the start that the job is unsuccess, unless proven otherwise
        success = False

        # check whether the yambo calc was an initialisation (p2y)
        try:
            settings_dict = self._calc.inputs.settings.get_dict()
            settings_dict = _uppercase_dict(settings_dict,
                                            dict_name='settings')
        except AttributeError:
            settings_dict = {}

        initialise = settings_dict.pop('INITIALISE', None)

        # select the folder object
        try:
            out_folder = self.retrieved
        except exceptions.NotExistent:
            return self.exit_codes.ERROR_NO_RETRIEVED_FOLDER

        try:
            input_params = self._calc.inputs.parameters.get_dict()
        except AttributeError:
            if not initialise:
                raise ParsingError("Input parameters not found!")
            else:
                input_params = {}
        # retrieve the cell: if parent_calc is a YamboCalculation we must find the original PwCalculation
        # going back through the graph tree.

        parent_calc = find_pw_parent(self._calc)
        cell = parent_calc.inputs.structure.cell

        output_params = {'warnings': [], 'errors': [], 'yambo_wrote_dbs': False, 'game_over': False,
        'p2y_completed': False, 'last_time':0,\
        'requested_time':self._calc.attributes['max_wallclock_seconds'], 'time_units':'seconds',\
        'memstats':[], 'para_error':False, 'memory_error':False,'timing':[],'time_error': False, 'has_gpu': False,
        'yambo_version':'4.5', 'Fermi(eV)':0}
        ndbqp = {}
        ndbhf = {}

        try:
            results = YamboFolder(out_folder._repository._repo_folder.abspath)
        except Exception as e:
            success = False
            return self.exit_codes.PARSER_ANOMALY
            #raise ParsingError("Unexpected behavior of YamboFolder: %s" % e)

        for file in os.listdir(out_folder._repository._repo_folder.abspath):
            if 'stderr' in file:
                with open(file, 'r') as stderr:
                    parse_scheduler_stderr(stderr, output_params)

        for result in results.yambofiles:
            if results is None:
                continue

            #This should be automatic in yambopy...
            if result.type == 'log':
                parse_log(result, output_params)
            if result.type == 'report':
                parse_report(result, output_params)

            if 'eel' in result.filename:
                eels_array = self._aiida_array(result.data)
                self.out(self._eels_array_linkname, eels_array)
            elif 'eps' in result.filename:
                eps_array = self._aiida_array(result.data)
                self.out(self._eps_array_linkname, eps_array)
            elif 'alpha' in result.filename:
                alpha_array = self._aiida_array(result.data)
                self.out(self._alpha_array_linkname, alpha_array)

            elif 'ndb.QP' == result.filename:
                ndbqp = copy.deepcopy(result.data)

            elif 'ndb.HF_and_locXC' == result.filename:
                ndbhf = copy.deepcopy(result.data)

            elif 'gw0___' in input_params:
                if self._aiida_bands_data(result.data, cell, result.kpoints):
                    arr = self._aiida_bands_data(result.data, cell,
                                                 result.kpoints)
                    if type(
                            arr
                    ) == BandsData:  # ArrayData is not BandsData, but BandsData is ArrayData
                        self.out(self._quasiparticle_bands_linkname, arr)
                    if type(arr) == ArrayData:  #
                        self.out(self._qp_array_linkname, arr)

            elif 'life___' in input_params:
                if self._aiida_bands_data(result.data, cell, result.kpoints):
                    arr = self._aiida_bands_data(result.data, cell,
                                                 result.kpoints)
                    if type(arr) == BandsData:
                        self.out(self._alpha_array_linkname + '_bands', arr)
                    elif type(arr) == ArrayData:
                        self.out(self._alpha_array_linkname + '_arr', arr)

        yambo_wrote_dbs(output_params)

        # we store  all the information from the ndb.* files rather than in separate files
        # if possible, else we default to separate files.
        if ndbqp and ndbhf:  #
            self.out(self._ndb_linkname, self._sigma_c(ndbqp, ndbhf))
        else:
            if ndbqp:
                self.out(self._ndb_QP_linkname, self._aiida_ndb_qp(ndbqp))
            if ndbhf:
                self.out(self._ndb_HF_linkname, self._aiida_ndb_hf(ndbhf))

        if output_params['game_over']:
            success = True
        elif output_params['p2y_completed'] and initialise:
            success = True

        #last check on time
        delta_time = (float(output_params['requested_time'])-float(output_params['last_time'])) \
                  / float(output_params['requested_time'])

        if success == False:
            if delta_time > -2 and delta_time < 0.1:
                output_params['time_error'] = True

        params = Dict(dict=output_params)
        self.out(self._parameter_linkname, params)  # output_parameters

        if success == False:

            if 'time_most_prob' in output_params['errors']:
                return self.exit_codes.WALLTIME_ERROR
            elif output_params['para_error']:
                return self.exit_codes.PARA_ERROR
            elif output_params[
                    'memory_error'] and 'X_par_allocation' in output_params[
                        'errors']:
                return self.exit_codes.X_par_MEMORY_ERROR
            elif output_params['memory_error']:
                return self.exit_codes.MEMORY_ERROR
            elif output_params['time_error']:
                return self.exit_codes.WALLTIME_ERROR
            else:
                return self.exit_codes.NO_SUCCESS
Exemplo n.º 6
0
def parse_atomic_positions(txt):
    """
    Return a dictionary containing info from the ATOMIC_POSITIONS card block
    in txt.

    .. note:: If the units are unspecified, they will be returned as None.

    :param txt: A single string containing the QE input text to be parsed.
    :type txt: str

    :returns:
        A dictionary with

            * units: the units of the positions (always lower-case) or None
            * names: list of the atom names (e.g. ``'Si'``, ``'Si0'``,
              ``'Si_0'``)
            * positions: list of the [x, y, z] positions
            * fixed_coords: list of [x, y, z] (bools) of the force modifications
              (**Note:** True <--> Fixed, as defined in the
              ``BasePwCpInputGenerator._if_pos`` method)

        For example::

            {'units': 'bohr',
             'names': ['C', 'O'],
             'positions': [[0.0, 0.0, 0.0],
                           [0.0, 0.0, 2.5]]
             'fixed_coords': [[False, False, False],
                              [True, True, True]]}


    :raises aiida.common.exceptions.ParsingError: if there are issues
        parsing the input.
    """
    def str01_to_bool(s):
        """
        Map strings '0', '1' strings to bools: '0' --> True; '1' --> False.

        While this is opposite to the QE standard, this mapping is what needs to
        be passed to aiida in a 'settings' ParameterData object.
        (See the _if_pos method of BasePwCpInputGenerator)
        """
        if s == '0':
            return True
        elif s == '1':
            return False
        else:
            raise ParsingError(
                'Unable to convert if_pos = "{}" to bool'.format(s))

    # Define re for the card block.
    # NOTE: This will match card block lines w/ or w/out force modifications.
    atomic_positions_block_re = re.compile(
        r"""
        ^ \s* ATOMIC_POSITIONS \s*                      # Atomic positions start with that string
        [{(]? \s* (?P<units>\S+?)? \s* [)}]? \s* $\n    # The units are after the string in optional brackets
        (?P<block>                                  # This is the block of positions
            (
                (
                    \s*                                 # White space in front of the element spec is ok
                    (
                        [A-Za-z]+[A-Za-z0-9]{0,2}       # Element spec
                        (
                            \s+                         # White space in front of the number
                            [-|+]?                      # Plus or minus in front of the number (optional)
                            (
                                (
                                    \d*                 # optional decimal in the beginning .0001 is ok, for example
                                    [\.]                # There has to be a dot followed by
                                    \d+                 # at least one decimal
                                )
                                |                       # OR
                                (
                                    \d+                 # at least one decimal, followed by
                                    [\.]?               # an optional dot ( both 1 and 1. are fine)
                                    \d*                 # And optional number of decimals (1.00001)
                                )                        # followed by optional decimals
                            )
                            ([E|e|d|D][+|-]?\d+)?       # optional exponents E+03, e-05
                        ){3}                            # I expect three float values
                        ((\s+[0-1]){3}\s*)?             # Followed by optional ifpos
                        \s*                             # Followed by optional white space
                        |
                        \#.*                            # If a line is commented out, that is also ok
                        |
                        \!.*                            # Comments also with excl. mark in fortran
                    )
                    |                                   # OR
                    \s*                                 # A line only containing white space
                 )
                [\n]                                    # line break at the end
            )+                                          # A positions block should be one or more lines
        )
        """, re.X | re.M)

    atomic_positions_block_re_ = re.compile(
        r"""
        ^ [ \t]* ATOMIC_POSITIONS [ \t]*
            [{(]? [ \t]* (?P<units>\S+?)? [ \t]* [)}]? [ \t]* $\n
        (?P<block>
         (?:
          ^ [ \t]*
          (?:
           \S+ [ \t]+ \S+ [ \t]+ \S+ [ \t]+ \S+
           (?:[ \t]+ [{(]? [ \t]* [01] [ \t]+ [01] [ \t]+ [01] [ \t]* [)}]?)?
          )
          [ \t]* $\n?
         )+
        )
        """, RE_FLAGS)
    # Define re for atomic positions without force modifications.

    atomic_positions_w_constraints_re = re.compile(
        r"""
        ^                                       # Linestart
        [ \t]*                                  # Optional white space
        (?P<name>[A-Za-z]+[A-Za-z0-9]{0,2})\s+   # get the symbol, max 3 chars, starting with a char
        (?P<x>                                  # Get x
            [\-|\+]?(\d*[\.]\d+ | \d+[\.]?\d*)
            ([E|e|d|D][+|-]?\d+)?
        )
        [ \t]+
        (?P<y>                                  # Get y
            [\-|\+]?(\d*[\.]\d+ | \d+[\.]?\d*)
            ([E|e|d|D][+|-]?\d+)?
        )
        [ \t]+
        (?P<z>                                  # Get z
            [\-|\+]?(\d*[\.]\d+ | \d+[\.]?\d*)
            ([E|e|d|D][+|-]?\d+)?
        )
        [ \t]*
        (?P<fx>[01]?)                           # Get fx
        [ \t]*
        (?P<fy>[01]?)                           # Get fx
        [ \t]*
        (?P<fz>[01]?)                           # Get fx
        """, re.X | re.M)
    # Find the card block and extract units and the lines of the block.
    match = atomic_positions_block_re.search(txt)
    if not match:
        raise ParsingError(
            'The ATOMIC_POSITIONS card block was not found in\n' + txt)
    # Get the units. If they are not found, match.group('units') will be None.
    units = match.group('units')
    if units is not None:
        units = units.lower()
    # Get the string containing the lines of the block.
    if match.group('block') is None:
        raise ParsingError(
            'The ATOMIC_POSITIONS card block was parsed as empty in\n' + txt)
    else:
        blockstr = match.group('block')

    # Define a small helper function to convert if_pos strings to bools that
    # correspond to the mapping of BasePwCpInputGenerator._if_pos method.

    # Define a small helper function to convert strings of fortran-type floats.
    fortfloat = lambda s: float(s.replace('d', 'e').replace('D', 'E'))
    # Parse the lines of the card block, extracting an atom name, position
    # and fixed coordinates.
    names, positions, fixed_coords = [], [], []
    # First, try using the re for lines without force modifications. Set the
    # default force modification to the default (True) for each atom.
    # PROBLEM this changes the order of the atoms, which is unwanted!
    #~ for match in atomic_positions_re.finditer(blockstr):
    #~ names.append(match.group('name'))
    #~ positions.append(map(fortfloat, match.group('x', 'y', 'z')))
    #~ fixed_coords.append(3 * [False])  # False <--> not fixed (the default)
    # Next, try using the re for lines with force modifications.
    for match in atomic_positions_w_constraints_re.finditer(blockstr):
        positions.append(map(fortfloat, match.group('x', 'y', 'z')))
        fixed_coords_this_pos = [
            f or '1' for f in match.group('fx', 'fy', 'fz')
        ]  # False <--> not fixed (the default)
        fixed_coords.append(map(str01_to_bool, fixed_coords_this_pos))
        names.append(match.group('name'))

    # Check that the number of atomic positions parsed is equal to the number of
    # lines in blockstr
    # LK removed this check since lines can be commented out, and that is fine.
    # n_lines = len(blockstr.rstrip().split('\n'))
    #~ if len(names) != n_lines:
    #~ raise ParsingError(
    #~ 'Only {} atomic positions were parsed from the {} lines of the '
    #~ 'ATOMIC_POSITIONS card block:\n{}'.format(len(names), n_lines,
    #~ blockstr)
    #~ )
    info_dict = dict(units=units,
                     names=names,
                     positions=positions,
                     fixed_coords=fixed_coords)
    return info_dict
Exemplo n.º 7
0
def parse_cell_parameters(txt):
    """
    Return dict containing info from the CELL_PARAMETERS card block in txt.

    .. note:: This card is only needed if ibrav = 0. Therefore, if the card is
           not present, the function will return None and not raise an error.

    .. note:: If the units are unspecified, they will be returned as None. The
           units interpreted by QE depend on whether or not one of 'celldm(1)'
           or 'a' is set in &SYSTEM.

    :param txt: A single string containing the QE input text to be parsed.

    :returns:
        A dictionary (if CELL_PARAMETERS is present; else: None) with

            * units: the units of the lattice vectors (always lower-case) or
              None
            * cell: 3x3 list with lattice vectors as rows

        For example::

            {'units': 'angstrom',
             'cell': [[16.9, 0.0, 0.0],
                      [-2.6, 8.0, 0.0],
                      [-2.6, -3.5, 7.2]]}

    :raises aiida.common.exceptions.ParsingError: if there are issues
        parsing the input.
    """
    # Define re for the card block.
    cell_parameters_block_re = re.compile(
        r"""
        ^ [ \t]*
        CELL_PARAMETERS [ \t]*
        [{(]? \s* (?P<units>[a-z]*) \s* [)}]? \s* [\n]
        (?P<block>
        (
            (
                \s*             # White space in front of the element spec is ok
                (
                    # First number
                    (
                        [-|+]?   # Plus or minus in front of the number (optional)
                        (\d*     # optional decimal in the beginning .0001 is ok, for example
                        [\.]     # There has to be a dot followed by
                        \d+)     # at least one decimal
                        |        # OR
                        (\d+     # at least one decimal, followed by
                        [\.]?    # an optional dot
                        \d*)     # followed by optional decimals
                        ([E|e|d|D][+|-]?\d+)?  # optional exponents E+03, e-05, d0, D0
                    
                        (
                            \s+      # White space between numbers
                            [-|+]?   # Plus or minus in front of the number (optional)
                            (\d*     # optional decimal in the beginning .0001 is ok, for example
                            [\.]     # There has to be a dot followed by
                            \d+)     # at least one decimal
                            |        # OR
                            (\d+     # at least one decimal, followed by
                            [\.]?    # an optional dot
                            \d*)     # followed by optional decimals
                            ([E|e|d|D][+|-]?\d+)?  # optional exponents E+03, e-05, d0, D0
                        ){2}         # I expect three float values
                    )
                    |
                    \#
                    |
                    !            # If a line is commented out, that is also ok
                )
                .*               # I do not care what is after the comment or the vector
                |                # OR
                \s*              # A line only containing white space
             )
            [\n]                 # line break at the end
        ){3}                     # I need exactly 3 vectors
    )
    """, RE_FLAGS)

    cell_vector_regex = re.compile(
        r"""
        ^                        # Linestart
        [ \t]*                   # Optional white space
        (?P<x>                   # Get x
            [\-|\+]? ( \d*[\.]\d+ | \d+[\.]?\d*)
            ([E|e|d|D][+|-]?\d+)?
        )
        [ \t]+
        (?P<y>                   # Get y
            [\-|\+]? (\d*[\.]\d+ | \d+[\.]?\d*)
            ([E|e|d|D][+|-]?\d+)?
        )
        [ \t]+
        (?P<z>                   # Get z
            [\-|\+]? (\d*[\.]\d+ | \d+[\.]?\d*)
            ([E|e|d|D][+|-]?\d+)?
        )
        """, re.X | re.M)
    #~ cell_parameters_block_re = re.compile(r"""
    #~ ^ [ \t]* CELL_PARAMETERS [ \t]*
    #~ [{(]? [ \t]* (?P<units>\S+?)? [ \t]* [)}]? [ \t]* $\n
    #~ (?P<block>
    #~ (?:
    #~ ^ [ \t]* \S+ [ \t]+ \S+ [ \t]+ \S+ [ \t]* $\n?
    #~ ){3}
    #~ )
    #~ """, RE_FLAGS)
    # Define re for the info contained in the block.
    #~ atomic_species_re = re.compile(r"""
    #~ ^ [ \t]* (\S+) [ \t]+ (\S+) [ \t]+ (\S+) [ \t]* $\n?
    #~ """, RE_FLAGS)
    # Find the card block and extract units and the lines of the block.
    match = cell_parameters_block_re.search(txt)
    if not match:
        return None
    # Use specified units or None if not specified.
    units = match.group('units')
    if units is not None:
        units = units.lower()
    # Get the string containing the lines of the block.
    if match.group('block') is None:
        raise ParsingError(
            'The CELL_PARAMETER card block was parsed as empty in\n' + txt)
    else:
        blockstr = match.group('block')
    # Define a small helper function to convert strings of fortran-type floats.
    fortfloat = lambda s: float(s.replace('d', 'e').replace('D', 'E'))
    # Now, extract the lattice vectors.
    lattice_vectors = []
    for match in cell_vector_regex.finditer(blockstr):
        lattice_vectors.append(
            map(fortfloat,
                (match.group('x'), match.group('y'), match.group('z'))))
    info_dict = dict(units=units, cell=lattice_vectors)
    return info_dict
Exemplo n.º 8
0
def parse_upf(fname, check_filename=True):
    """
    Try to get relevant information from the UPF. For the moment, only the 
    element name. Note that even UPF v.2 cannot be parsed with the XML minidom!
    (e.g. due to the & characters in the human-readable section).
    
    If check_filename is True, raise a ParsingError exception if the filename 
    does not start with the element name.
    """
    import os

    from aiida.common.exceptions import ParsingError
    from aiida.common import aiidalogger
    # TODO: move these data in a 'chemistry' module
    from aiida.orm.data.structure import _valid_symbols

    parsed_data = {}

    with open(fname) as f:
        first_line = f.readline().strip()
        match = _upfversion_regexp.match(first_line)
        if match:
            version = match.group('version')
            aiidalogger.debug("Version found: {} for file {}".format(
                version, fname))
        else:
            aiidalogger.debug("Assuming version 1 for file {}".format(fname))
            version = "1"

        parsed_data['version'] = version
        try:
            version_major = int(version.partition('.')[0])
        except ValueError:
            # If the version string does not start with a dot, fallback
            # to version 1
            aiidalogger.debug("Falling back to version 1 for file {}, "
                              "version string '{}' unrecognized".format(
                                  fname, version))
            version_major = 1

        element = None
        if version_major == 1:
            for l in f:
                match = _element_v1_regexp.match(l.strip())
                if match:
                    element = match.group('element_name')
                    break
        else:  # all versions > 1
            for l in f:
                match = _element_v2_regexp.match(l.strip())
                if match:
                    element = match.group('element_name')
                    break

        if element is None:
            raise ParsingError(
                "Unable to find the element of UPF {}".format(fname))
        element = element.capitalize()
        if element not in _valid_symbols:
            raise ParsingError("Unknown element symbol {} for file {}".format(
                element, fname))
        if check_filename:
            if not os.path.basename(fname).lower().startswith(element.lower()):
                raise ParsingError("Filename {0} was recognized for element "
                                   "{1}, but the filename does not start "
                                   "with {1}".format(fname, element))

        parsed_data['element'] = element

    return parsed_data
Exemplo n.º 9
0
def parse_namelists(txt):
    """
    Parse txt to extract a dictionary of the namelist info.
    
    :param txt: A single string containing the QE input text to be parsed.
    :type txt: str

    :returns:
        A nested dictionary of the namelists and their key-value pairs. The
        namelists will always be upper-case keys, while the parameter keys will
        always be lower-case.

        For example::

            {"CONTROL": {"calculation": "bands",
                         "prefix": "al",
                         "pseudo_dir": "./pseudo",
                         "outdir": "./out"},
             "ELECTRONS": {"diagonalization": "cg"},
             "SYSTEM": {"nbnd": 8,
                        "ecutwfc": 15.0,
                        "celldm(1)": 7.5,
                        "ibrav": 2,
                        "nat": 1,
                        "ntyp": 1}
            }

    :raises aiida.common.exceptions.ParsingError: if there are issues
        parsing the input.
    """
    # TODO: Incorporate support for algebraic expressions?
    # Define the re to match a namelist and extract the info from it.
    namelist_re = re.compile(
        r"""
        ^ [ \t]* &(\S+) [ \t]* $\n  # match line w/ nmlst tag; save nmlst name
        (
         [\S\s]*?                # match any line non-greedily
        )                        # save the group of text between nmlst
        ^ [ \t]* / [ \t]* $\n    # match line w/ "/" as only non-whitespace char
        """, re.M | re.X)
    # Define the re to match and extract all of the key = val pairs inside
    # a block of namelist text.
    key_value_re = re.compile(
        r"""
        [ \t]* (\S+?) [ \t]*  # match and store key
        =               # equals sign separates key and value
        [ \t]* (\S+?) [ \t]*  # match and store value
        [\n,]           # return or comma separates "key = value" pairs
        """, re.M | re.X)
    # Scan through the namelists...
    params_dict = {}
    for nmlst, blockstr in namelist_re.findall(txt):
        # ...extract the key value pairs, storing them each in nmlst_dict,...
        nmlst_dict = {}
        for key, valstr in key_value_re.findall(blockstr):
            nmlst_dict[key.lower()] = str2val(valstr)
        # ...and, store nmlst_dict as a value in params_dict with the namelist
        # as the key.
        if len(nmlst_dict.keys()) > 0:
            params_dict[nmlst.upper()] = nmlst_dict
    if len(params_dict) == 0:
        raise ParsingError(
            'No data was found while parsing the namelist in the following '
            'text\n' + txt)
    return _uppercase_dict(params_dict, "params_dict")
Exemplo n.º 10
0
def parse_k_points(txt):
    """
    Return a dictionary containing info from the K_POINTS card block in txt.

    .. note:: If the type of kpoints (where type = x in the card header,
           "K_POINTS x") is not present, type will be returned as 'tpiba', the
           QE default.

    :param txt: A single string containing the QE input text to be parsed.

    :returns:
        A dictionary containing

            * type: the type of kpoints (always lower-case)
            * points: an Nx3 list of the kpoints (will not be present if type =
              'gamma' or type = 'automatic')
            * weights: a 1xN list of the kpoint weights (will not be present if
              type = 'gamma' or type = 'automatic')
            * mesh: a 1x3 list of the number of equally-spaced points in each
              direction of the Brillouin zone, as in Monkhorst-Pack grids (only
              present if type = 'automatic')
            * offset: a 1x3 list of the grid offsets in each direction of the
              Brillouin zone (only present if type = 'automatic')
              (**Note:** The offset value for each direction will be *one of*
              ``0.0`` [no offset] *or* ``0.5`` [offset by half a grid step].
              This differs from the Quantum Espresso convention, where an offset
              value of ``1`` corresponds to a half-grid-step offset, but adheres
              to the current AiiDa convention.


        Examples::

            {'type': 'crystal',
             'points': [[0.125,  0.125,  0.0],
                        [0.125,  0.375,  0.0],
                        [0.375,  0.375,  0.0]],
             'weights': [1.0, 2.0, 1.0]}

            {'type': 'automatic',
             'points': [8, 8, 8],
             'offset': [0.0, 0.5, 0.0]}

            {'type': 'gamma'}

    :raises aiida.common.exceptions.ParsingError: if there are issues
        parsing the input.
    """
    # Define re for the special-type card block.
    k_points_special_block_re = re.compile(
        r"""
        ^ [ \t]* K_POINTS [ \t]*
            [{(]? [ \t]* (?P<type>\S+?)? [ \t]* [)}]? [ \t]* $\n
        ^ [ \t]* \S+ [ \t]* $\n  # nks
        (?P<block>
         (?:
          ^ [ \t]* \S+ [ \t]+ \S+ [ \t]+ \S+ [ \t]+ \S+ [ \t]* $\n?
         )+
        )
        """, RE_FLAGS)
    # Define re for the info contained in the special-type block.
    k_points_special_re = re.compile(
        r"""
    ^ [ \t]* (\S+) [ \t]+ (\S+) [ \t]+ (\S+) [ \t]+ (\S+) [ \t]* $\n?
    """, RE_FLAGS)
    # Define re for the automatic-type card block and its line of info.
    k_points_automatic_block_re = re.compile(
        r"""
        ^ [ \t]* K_POINTS [ \t]* [{(]? [ \t]* automatic [ \t]* [)}]? [ \t]* $\n
        ^ [ \t]* (\S+) [ \t]+ (\S+) [ \t]+ (\S+) [ \t]+ (\S+) [ \t]+ (\S+)
            [ \t]+ (\S+) [ \t]* $\n?
        """, RE_FLAGS)
    # Define re for the gamma-type card block. (There is no block info.)
    k_points_gamma_block_re = re.compile(
        r"""
        ^ [ \t]* K_POINTS [ \t]* [{(]? [ \t]* gamma [ \t]* [)}]? [ \t]* $\n
        """, RE_FLAGS)
    # Try finding the card block using all three types.
    info_dict = {}
    match = k_points_special_block_re.search(txt)
    if match:
        if match.group('type') is not None:
            info_dict['type'] = match.group('type').lower()
        else:
            info_dict['type'] = 'tpiba'
        blockstr = match.group('block')
        points = []
        weights = []
        for match in k_points_special_re.finditer(blockstr):
            points.append(map(float, match.group(1, 2, 3)))
            weights.append(float(match.group(4)))
        info_dict['points'] = points
        info_dict['weights'] = weights
    else:
        match = k_points_automatic_block_re.search(txt)
        if match:
            info_dict['type'] = 'automatic'
            info_dict['points'] = map(int, match.group(1, 2, 3))
            info_dict['offset'] = [
                0. if x == 0 else 0.5 for x in map(int, match.group(4, 5, 6))
            ]
        else:
            match = k_points_gamma_block_re.search(txt)
            if match:
                info_dict['type'] = 'gamma'
            else:
                raise ParsingError('K_POINTS card not found in\n' + txt)
    return info_dict
Exemplo n.º 11
0
    def _check_calc_compatibility(self, calc):
        from aiida.common.exceptions import ParsingError

        if not isinstance(calc, self._supported_calculation_class):
            raise ParsingError("Input calc must be a {}".format(
                self._supported_calculation_class.__name__))
Exemplo n.º 12
0
def parse_cell_parameters(txt):
    """
    Return dict containing info from the CELL_PARAMETERS card block in txt.

    .. note:: This card is only needed if ibrav = 0. Therefore, if the card is
           not present, the function will return None and not raise an error.

    .. note:: If the units are unspecified, they will be returned as None. The
           units interpreted by QE depend on whether or not one of 'celldm(1)'
           or 'a' is set in &SYSTEM.

    :param txt: A single string containing the QE input text to be parsed.

    :returns:
        A dictionary (if CELL_PARAMETERS is present; else: None) with

            * units: the units of the lattice vectors (always lower-case) or
              None
            * cell: 3x3 list with lattice vectors as rows

        For example::

            {'units': 'angstrom',
             'cell': [[16.9, 0.0, 0.0],
                      [-2.6, 8.0, 0.0],
                      [-2.6, -3.5, 7.2]]}

    :raises aiida.common.exceptions.ParsingError: if there are issues
        parsing the input.
    """
    # Define re for the card block.
    cell_parameters_block_re = re.compile(
        r"""
        ^ [ \t]* CELL_PARAMETERS [ \t]*
            [{(]? [ \t]* (?P<units>\S+?)? [ \t]* [)}]? [ \t]* $\n
        (?P<block>
         (?:
          ^ [ \t]* \S+ [ \t]+ \S+ [ \t]+ \S+ [ \t]* $\n?
         ){3}
        )
        """, RE_FLAGS)
    # Define re for the info contained in the block.
    atomic_species_re = re.compile(
        r"""
        ^ [ \t]* (\S+) [ \t]+ (\S+) [ \t]+ (\S+) [ \t]* $\n?
        """, RE_FLAGS)
    # Find the card block and extract units and the lines of the block.
    match = cell_parameters_block_re.search(txt)
    if not match:
        return None
    # Use specified units or None if not specified.
    units = match.group('units')
    if units is not None:
        units = units.lower()
    # Get the string containing the lines of the block.
    if match.group('block') is None:
        raise ParsingError(
            'The CELL_PARAMETER card block was parsed as empty in\n' + txt)
    else:
        blockstr = match.group('block')
    # Define a small helper function to convert strings of fortran-type floats.
    fortfloat = lambda s: float(s.replace('d', 'e').replace('D', 'E'))
    # Now, extract the lattice vectors.
    lattice_vectors = []
    for match in atomic_species_re.finditer(blockstr):
        lattice_vectors.append(map(fortfloat, match.groups()))
    info_dict = dict(units=units, cell=lattice_vectors)
    return info_dict
Exemplo n.º 13
0
def parse_atomic_positions(txt):
    """
    Return a dictionary containing info from the ATOMIC_POSITIONS card block
    in txt.

    .. note:: If the units are unspecified, they will be returned as None.

    :param txt: A single string containing the QE input text to be parsed.
    :type txt: str

    :returns:
        A dictionary with

            * units: the units of the positions (always lower-case) or None
            * names: list of the atom names (e.g. ``'Si'``, ``'Si0'``,
              ``'Si_0'``)
            * positions: list of the [x, y, z] positions
            * fixed_coords: list of [x, y, z] (bools) of the force modifications
              (**Note:** True <--> Fixed, as defined in the
              ``BasePwCpInputGenerator._if_pos`` method)

        For example::

            {'units': 'bohr',
             'names': ['C', 'O'],
             'positions': [[0.0, 0.0, 0.0],
                           [0.0, 0.0, 2.5]]
             'fixed_coords': [[False, False, False],
                              [True, True, True]]}


    :raises aiida.common.exceptions.ParsingError: if there are issues
        parsing the input.
    """
    # Define re for the card block.
    # NOTE: This will match card block lines w/ or w/out force modifications.
    atomic_positions_block_re = re.compile(
        r"""
        ^ [ \t]* ATOMIC_POSITIONS [ \t]*
            [{(]? [ \t]* (?P<units>\S+?)? [ \t]* [)}]? [ \t]* $\n
        (?P<block>
         (?:
          ^ [ \t]*
          (?:
           \S+ [ \t]+ \S+ [ \t]+ \S+ [ \t]+ \S+
           (?:[ \t]+ [{(]? [ \t]* [01] [ \t]+ [01] [ \t]+ [01] [ \t]* [)}]?)?
          )
          [ \t]* $\n?
         )+
        )
        """, RE_FLAGS)
    # Define re for atomic positions without force modifications.
    atomic_positions_re = re.compile(
        r"""
        ^ [ \t]*
        (?P<name>\S+) [ \t]+ (?P<x>\S+) [ \t]+ (?P<y>\S+) [ \t]+ (?P<z>\S+)
            [ \t]* $\n?
        """, RE_FLAGS)
    # Define re for atomic positions with force modifications.
    atomic_positions_constraints_re = re.compile(
        r"""
        ^ [ \t]*
        (?P<name>\S+) [ \t]+ (?P<x>\S+) [ \t]+ (?P<y>\S+) [ \t]+ (?P<z>\S+)
            [ \t]+ [{(]? [ \t]* (?P<if_pos1>[01]) [ \t]+ (?P<if_pos2>[01])
            [ \t]+ (?P<if_pos3>[01]) [ \t]* [)}]?
        [ \t]* $\n?
        """, RE_FLAGS)
    # Find the card block and extract units and the lines of the block.
    match = atomic_positions_block_re.search(txt)
    if not match:
        raise ParsingError(
            'The ATOMIC_POSITIONS card block was not found in\n' + txt)
    # Get the units. If they are not found, match.group('units') will be None.
    units = match.group('units')
    if units is not None:
        units = units.lower()
    # Get the string containing the lines of the block.
    if match.group('block') is None:
        raise ParsingError(
            'The ATOMIC_POSITIONS card block was parsed as empty in\n' + txt)
    else:
        blockstr = match.group('block')

    # Define a small helper function to convert if_pos strings to bools that
    # correspond to the mapping of BasePwCpInputGenerator._if_pos method.
    def str01_to_bool(s):
        """
        Map strings '0', '1' strings to bools: '0' --> True; '1' --> False.

        While this is opposite to the QE standard, this mapping is what needs to
        be passed to aiida in a 'settings' ParameterData object.
        (See the _if_pos method of BasePwCpInputGenerator)
        """
        if s == '0':
            return True
        elif s == '1':
            return False
        else:
            raise ParsingError(
                'Unable to convert if_pos = {} to bool'.format(s))

    # Define a small helper function to convert strings of fortran-type floats.
    fortfloat = lambda s: float(s.replace('d', 'e').replace('D', 'E'))
    # Parse the lines of the card block, extracting an atom name, position
    # and fixed coordinates.
    names, positions, fixed_coords = [], [], []
    # First, try using the re for lines without force modifications. Set the
    # default force modification to the default (True) for each atom.
    for match in atomic_positions_re.finditer(blockstr):
        names.append(match.group('name'))
        positions.append(map(fortfloat, match.group('x', 'y', 'z')))
        fixed_coords.append(3 * [False])  # False <--> not fixed (the default)
    # Next, try using the re for lines with force modifications.
    for match in atomic_positions_constraints_re.finditer(blockstr):
        names.append(match.group('name'))
        positions.append(map(fortfloat, match.group('x', 'y', 'z')))
        if_pos123 = match.group('if_pos1', 'if_pos2', 'if_pos3')
        fixed_coords.append(map(str01_to_bool, if_pos123))
    # Check that the number of atomic positions parsed is equal to the number of
    # lines in blockstr
    n_lines = len(blockstr.rstrip().split('\n'))
    if len(names) != n_lines:
        raise ParsingError(
            'Only {} atomic positions were parsed from the {} lines of the '
            'ATOMIC_POSITIONS card block:\n{}'.format(len(names), n_lines,
                                                      blockstr))
    info_dict = dict(units=units,
                     names=names,
                     positions=positions,
                     fixed_coords=fixed_coords)
    return info_dict
Exemplo n.º 14
0
    def get_structuredata(self):
        """
        Return a StructureData object based on the data in the input file.
        
        This uses all of the data in the input file to do the necessary unit 
        conversion, ect. and then creates an AiiDa StructureData object.
    
        All of the names corresponding of the Kind objects composing the 
        StructureData object will match those found in the ATOMIC_SPECIES 
        block, so the pseudopotentials can be linked to the calculation using 
        the kind.name for each specific type of atom (in the event that you 
        wish to use different pseudo's for two or more of the same atom).
    
        :return: StructureData object of the structure in the input file
        :rtype: aiida.orm.data.structure.StructureData
        :raises aiida.common.exceptions.ParsingError: if there are issues
            parsing the input.
        """
        # CELL_PARAMETERS are present.
        if self.cell_parameters is None:
            raise ParsingError(
                'CELL_PARAMETERS not found while parsing the input file. This '
                'card is needed for AiiDa.')

        # Figure out the factor needed to convert the lattice vectors
        # to Angstroms.
        # TODO: ***ASK GEORGE IF I SHOULD MULTIPLY OR DIVIDE BY ALAT***
        cell_units = self.cell_parameters.get('units')
        if (cell_units == 'alat') or (cell_units is None):
            # Try to determine the value of alat from the namelist.
            celldm1 = self.namelists['SYSTEM'].get('celldm(1)')
            a = self.namelists['SYSTEM'].get('a')
            # Only one of 'celldm(1)' or 'a' can be set.
            if (celldm1 is not None) and (a is not None):
                raise ParsingError(
                    "Both 'celldm(1)' and 'a' were set in the input file.")
            elif celldm1 is not None:
                cell_conv_factor = celldm1 * bohr_to_ang  # celldm(1) in Bohr
            elif a is not None:
                cell_conv_factor = a  # a is in Angstroms
            else:
                if cell_units is None:
                    cell_conv_factor = bohr_to_ang  # QE assumes Bohr
                else:
                    raise ParsingError(
                        "Unable to determine the units of the lattice vectors."
                    )
        elif cell_units == 'bohr':
            cell_conv_factor = bohr_to_ang
        elif cell_units == 'angstrom':
            cell_conv_factor = 1.0
        else:
            raise ParsingError(
                "Unable to determine the units of the lattice vectors.")

        # Get the lattice vectors and convert them to units of Angstroms.
        cell = np.array(self.cell_parameters['cell']) * cell_conv_factor

        # Get the positions and convert them to [x, y, z] Angstrom vectors.
        pos_units = self.atomic_positions['units']
        positions = np.array(self.atomic_positions['positions'])
        if pos_units in (None, 'alat'):  # QE assumes alat
            alat = np.linalg.norm(cell[0])  # Cell in Ang, so alat in Ang
            positions *= alat
        elif pos_units == 'bohr':
            positions = positions * bohr_to_ang
        elif pos_units == 'angstrom':
            pass
        elif pos_units == 'crystal':
            positions = np.dot(positions, cell)  # rotate into [x y z] basis
        else:
            raise ParsingError(
                'Unable to determine to convert positions to [x y z] Angstrom.'
            )

        # Get the atom names corresponding to positions.
        names = self.atomic_positions['names']

        # Create a dictionary that maps an atom name to it's mass.
        mass_dict = dict(
            zip(self.atomic_species['names'], self.atomic_species['masses']))

        # Use the names to figure out the atomic symbols.
        symbols = []
        for name in names:
            candiates = [
                s for s in _valid_symbols if name.lower().startswith(s.lower())
            ]
            if len(candiates) == 0:
                raise ParsingError(
                    'Unable to figure out the element represented by the '
                    'label, {}, in the input file.'.format(name))
            # Choose the longest match, since, for example, S and Si match Si.
            symbols.append(max(candiates, key=lambda x: len(x)))

        # Now that we have the names and their corresponding symbol and mass, as
        # well as the positions and cell in units of Angstroms, we create the
        # StructureData object.
        structuredata = StructureData(cell=cell)
        for name, symbol, position in zip(names, symbols, positions):
            mass = mass_dict[name]
            structuredata.append_atom(name=name,
                                      symbols=symbol,
                                      position=position,
                                      mass=mass)
        return structuredata
Exemplo n.º 15
0
    def __init__(self, pwinput):
        """
        Parse inputs's namelist and cards to create attributes of the info.

        :param pwinput:
            Any one of the following

                * A string of the (existing) absolute path to the pwinput file.
                * A single string containing the pwinput file's text.
                * A list of strings, with the lines of the file as the elements.
                * A file object. (It will be opened, if it isn't already.)

        :raises IOError: if ``pwinput`` is a file and there is a problem reading
            the file.
        :raises TypeError: if ``pwinput`` is a list containing any non-string
            element(s).
        :raises aiida.common.exceptions.ParsingError: if there are issues
            parsing the pwinput.
        """
        # Get the text of the pwinput file as a single string.
        # File.
        if isinstance(pwinput, file):
            try:
                self.input_txt = pwinput.read()
            except IOError:
                raise IOError('Unable to open the provided pwinput, {}'
                              ''.format(file.name))
        # List.
        elif isinstance(pwinput, list):
            if all((issubclass(type(s), basestring) for s in pwinput)):
                self.input_txt = ''.join(pwinput)
            else:
                raise TypeError(
                    'You provided a list to parse, but some elements were not '
                    'strings. Each element should be a string containing a line'
                    'of the pwinput file.')
        # Path or string of the text.
        elif issubclass(type(pwinput), basestring):
            if os.path.isfile(pwinput):
                if os.path.exists(pwinput) and os.path.isabs(pwinput):
                    self.input_txt = open(pwinput).read()
                else:
                    raise IOError(
                        'Please provide the absolute path to an existing '
                        'pwinput file.')
            else:
                self.input_txt = pwinput

        # Check that pwinput is not empty.
        if len(self.input_txt.strip()) == 0:
            raise ParsingError('The pwinput provided was empty!')

        # Parse the namelists.
        self.namelists = parse_namelists(self.input_txt)
        # Parse the ATOMIC_POSITIONS card.
        self.atomic_positions = parse_atomic_positions(self.input_txt)
        # Parse the CELL_PARAMETERS card.
        self.cell_parameters = parse_cell_parameters(self.input_txt)
        # Parse the K_POINTS card.
        self.k_points = parse_k_points(self.input_txt)
        # Parse the ATOMIC_SPECIES card.
        self.atomic_species = parse_atomic_species(self.input_txt)
Exemplo n.º 16
0
    def parse_pseudos_from_directory(cls, dirpath):
        """Parse the pseudo potential files in the given directory into a list of data nodes.

        .. note:: The directory pointed to by `dirpath` should only contain pseudo potential files. Optionally, it can
            contain just a single directory, that contains all the pseudo potential files. If any other files are stored
            in the basepath or the subdirectory, that cannot be successfully parsed as pseudo potential files the method
            will raise a `ValueError`.

        :param dirpath: absolute path to a directory containing pseudo potentials.
        :return: list of data nodes
        :raises ValueError: if `dirpath` is not a directory or contains anything other than files.
        :raises ValueError: if `dirpath` contains multiple pseudo potentials for the same element.
        :raises ParsingError: if the constructor of the pseudo type fails for one of the files in the `dirpath`.
        """
        from aiida.common.exceptions import ParsingError

        pseudos = []

        if not os.path.isdir(dirpath):
            raise ValueError(f'`{dirpath}` is not a directory')

        dirpath_contents = os.listdir(dirpath)

        if len(dirpath_contents) == 1 and os.path.isdir(
                os.path.join(dirpath, dirpath_contents[0])):
            dirpath = os.path.join(dirpath, dirpath_contents[0])

        for filename in os.listdir(dirpath):
            filepath = os.path.join(dirpath, filename)

            if not os.path.isfile(filepath):
                raise ValueError(
                    f'dirpath `{dirpath}` contains at least one entry that is not a file'
                )

            try:
                with open(filepath, 'rb') as handle:
                    pseudo = cls._pseudo_type(handle, filename=filename)
            except ParsingError as exception:
                raise ParsingError(f'failed to parse `{filepath}`: {exception}'
                                   ) from exception
            else:
                if pseudo.element is None:
                    match = re.search(r'^([A-Za-z]{1,2})\.\w+', filename)
                    if match is None:
                        raise ParsingError(
                            f'`{cls._pseudo_type}` constructor did not define the element and could not parse a valid '
                            'element symbol from the filename `{filename}` either. It should have the format '
                            '`ELEMENT.EXTENSION`')
                    pseudo.element = match.group(1)
                pseudos.append(pseudo)

        if not pseudos:
            raise ValueError(
                f'no pseudo potentials were parsed from `{dirpath}`')

        elements = set(pseudo.element for pseudo in pseudos)

        if len(pseudos) != len(elements):
            raise ValueError(
                f'directory `{dirpath}` contains pseudo potentials with duplicate elements'
            )

        return pseudos