Пример #1
0
    def parse_stdout(self):
        """Parse the stdout file of pw2gw to build the `output_parameters` node."""
        from aiida_quantumespresso.utils.mapping import get_logging_container
        from aiida_quantumespresso.parsers.parse_raw.pw2gw import parse_stdout

        logs = get_logging_container()
        parsed_data = {}

        filename_stdout = self.node.get_attribute('output_filename')

        if filename_stdout not in self.retrieved.list_object_names():
            self.exit_code_stdout = self.exit_codes.ERROR_OUTPUT_STDOUT_MISSING
            return parsed_data, logs

        try:
            stdout = self.retrieved.get_object_content(filename_stdout)
        except IOError:
            self.exit_code_stdout = self.exit_codes.ERROR_OUTPUT_STDOUT_READ
            return parsed_data, logs

        try:
            parsed_data, logs = parse_stdout(stdout)
        except Exception:
            import traceback
            traceback.print_exc()
            self.exit_code_stdout = self.exit_codes.ERROR_UNEXPECTED_PARSER_EXCEPTION

        # If the stdout was incomplete, most likely the job was interrupted before it could cleanly finish, so the
        # output files are most likely corrupt and cannot be restarted from
        if 'ERROR_OUTPUT_STDOUT_INCOMPLETE' in logs['error']:
            self.exit_code_stdout = self.exit_codes.ERROR_OUTPUT_STDOUT_INCOMPLETE

        return parsed_data, logs
Пример #2
0
    def parse_stdout(self, parameters, parser_options=None, parsed_xml=None):
        """Parse the stdout output file.

        :param parameters: the input parameters dictionary
        :param parser_options: optional dictionary with parser options
        :param parsed_xml: the raw parsed data from the XML output
        :return: tuple of two dictionaries, first with raw parsed data and second with log messages
        """
        from aiida_quantumespresso.parsers.parse_raw.pw import parse_stdout

        logs = get_logging_container()
        parsed_data = {}

        filename_stdout = self.node.get_attribute('output_filename')

        if filename_stdout not in self.retrieved.list_object_names():
            self.exit_code_stdout = self.exit_codes.ERROR_OUTPUT_STDOUT_MISSING
            return parsed_data, logs

        try:
            stdout = self.retrieved.get_object_content(filename_stdout)
        except IOError:
            self.exit_code_stdout = self.exit_codes.ERROR_OUTPUT_STDOUT_READ
            return parsed_data, logs

        try:
            parsed_data, logs = parse_stdout(stdout, parameters,
                                             parser_options, parsed_xml)
        except Exception:
            logs.critical.append(traceback.format_exc())
            self.exit_code_stdout = self.exit_codes.ERROR_UNEXPECTED_PARSER_EXCEPTION

        # If the stdout was incomplete, most likely the job was interrupted before it could cleanly finish, so the
        # output files are most likely corrupt and cannot be restarted from
        if 'ERROR_OUTPUT_STDOUT_INCOMPLETE' in logs['error']:
            self.exit_code_stdout = self.exit_codes.ERROR_OUTPUT_STDOUT_INCOMPLETE

        # Under certain conditions, such as the XML missing or being incorrect, the structure data might be incomplete.
        # Since following code depends on it, we replace missing information taken from the input structure.
        structure = self.node.inputs.structure
        parsed_data.setdefault('structure', {}).setdefault('cell', {})

        if 'lattice_vectors' not in parsed_data['structure']['cell']:
            parsed_data['structure']['cell'][
                'lattice_vectors'] = structure.cell

        if 'atoms' not in parsed_data['structure']['cell']:
            symbols = {
                s.kind_name: structure.get_kind(s.kind_name).symbol
                for s in structure.sites
            }
            parsed_data['structure']['cell']['atoms'] = [
                (symbols[s.kind_name], s.position) for s in structure.sites
            ]

        return parsed_data, logs
Пример #3
0
def parse_output_base(filecontent, codename=None, message_map=None):
    """Parses the output file of a QE calculation, just checking for basic content like JOB DONE, errors with %%%% etc.

    :param filecontent: a string with the output file content
    :param codename: the string printed both in the header and near the walltime.
        If passed, a few more things are parsed (e.g. code version, walltime, ...)
    :returns: tuple of two dictionaries, with the parsed data and log messages, respectively
    """
    from aiida_quantumespresso.utils.mapping import get_logging_container

    keys = ['error', 'warning']

    if message_map is not None and (not isinstance(message_map, dict) or any(key not in message_map for key in keys)):
        raise RuntimeError('invalid format `message_map`: should be dictionary with two keys {}'.format(keys))

    logs = get_logging_container()
    parsed_data = get_parser_info(parser_info_template='aiida-quantumespresso parser simple v{}')

    lines = filecontent if isinstance(filecontent, list) else filecontent.split('\n')

    for line in lines:
        if 'JOB DONE' in line:
            break
    else:
        logs.error.append('ERROR_OUTPUT_STDOUT_INCOMPLETE')

    if codename is not None:

        codestring = 'Program {}'.format(codename)

        for line_number, line in enumerate(lines):

            if codestring in line and 'starts on' in line:
                parsed_data['code_version'] = line.split(codestring)[1].split('starts on')[0].strip()

            # Parse the walltime
            if codename in line and 'WALL' in line:
                try:
                    time = line.split('CPU')[1].split('WALL')[0].strip()
                    parsed_data['wall_time'] = time
                except (ValueError, IndexError):
                    logs.warnings.append('ERROR_PARSING_WALLTIME')
                else:
                    try:
                        parsed_data['wall_time_seconds'] = convert_qe_time_to_sec(time)
                    except ValueError:
                        logs.warnings.append('ERROR_CONVERTING_WALLTIME_TO_SECONDS')

            # Parse an error message with optional mapping of the message
            if '%%%%%%%%%%%%%%' in line:
                parse_output_error(lines, line_number, logs, message_map)

    return parsed_data, logs
Пример #4
0
    def parse_xml(self, dir_with_bands=None, parser_options=None):
        """Parse the XML output file.

        :param dir_with_bands: absolute path to directory containing individual k-point XML files for old XML format.
        :param parser_options: optional dictionary with parser options
        :return: tuple of two dictionaries, first with raw parsed data and second with log messages
        """
        from .parse_xml.exceptions import XMLParseError, XMLUnsupportedFormatError
        from .parse_xml.pw.parse import parse_xml

        logs = get_logging_container()
        parsed_data = {}

        object_names = self.retrieved.list_object_names()
        xml_files = [
            xml_file for xml_file in self.node.process_class.xml_filenames
            if xml_file in object_names
        ]

        if not xml_files:
            if not self.node.get_option('without_xml'):
                self.exit_code_xml = self.exit_codes.ERROR_OUTPUT_XML_MISSING
            return parsed_data, logs

        if len(xml_files) > 1:
            self.exit_code_xml = self.exit_codes.ERROR_OUTPUT_XML_MULTIPLE
            return parsed_data, logs

        try:
            with self.retrieved.open(xml_files[0]) as xml_file:
                parsed_data, logs = parse_xml(xml_file, dir_with_bands)
        except IOError:
            self.exit_code_xml = self.exit_codes.ERROR_OUTPUT_XML_READ
        except XMLParseError:
            self.exit_code_xml = self.exit_codes.ERROR_OUTPUT_XML_PARSE
        except XMLUnsupportedFormatError:
            self.exit_code_xml = self.exit_codes.ERROR_OUTPUT_XML_FORMAT
        except Exception:
            logs.critical.append(traceback.format_exc())
            self.exit_code_xml = self.exit_codes.ERROR_UNEXPECTED_PARSER_EXCEPTION

        return parsed_data, logs
Пример #5
0
def parse_xml(xml_file, dir_with_bands=None, include_deprecated_v2_keys=False):
    try:
        xml_parsed = ElementTree.parse(xml_file)
    except ElementTree.ParseError:
        raise XMLParseError('error while parsing XML file')

    xml_file_version = get_xml_file_version(xml_parsed)

    try:
        if xml_file_version == QeXmlVersion.POST_6_2:
            parsed_data, logs = parse_pw_xml_post_6_2(
                xml_parsed, include_deprecated_v2_keys)
        elif xml_file_version == QeXmlVersion.PRE_6_2:
            xml_file.seek(0)
            parsed_data, logs = parse_pw_xml_pre_6_2(
                xml_file, dir_with_bands, include_deprecated_v2_keys)
    except Exception:
        import traceback
        logs = get_logging_container()
        logs.critical.append(traceback.format_exc())
        parsed_data = {}

    return parsed_data, logs
Пример #6
0
def parse_stdout(stdout,
                 input_parameters,
                 parser_options=None,
                 parsed_xml=None):
    """Parses the stdout content of a Quantum ESPRESSO `pw.x` calculation.

    :param stdout: the stdout content as a string
    :param input_parameters: dictionary with the input parameters
    :param parser_options: the parser options from the settings input parameter node
    :param parsed_xml: dictionary with data parsed from the XML output file
    :returns: tuple of two dictionaries, with the parsed data and log messages, respectively
    """
    if parser_options is None:
        parser_options = {}

    if parsed_xml is None:
        parsed_xml = {}

    # Separate the input string into separate lines
    data_lines = stdout.split('\n')

    logs = get_logging_container()

    parsed_data = {}
    vdw_correction = False
    bands_data = parsed_xml.pop('bands', {})
    structure_data = parsed_xml.pop('structure', {})
    trajectory_data = {}

    maximum_ionic_steps = None
    marker_bfgs_converged = False

    # First check whether the `JOB DONE` message was written, otherwise the job was interrupted
    for line in data_lines:
        if 'JOB DONE' in line:
            break
    else:
        logs.error.append('ERROR_OUTPUT_STDOUT_INCOMPLETE')

    # Determine whether the input switched on an electric field
    lelfield = input_parameters.get('CONTROL', {}).get('lelfield', False)

    # Find some useful quantities.
    if not parsed_xml.get('number_of_bands', None):
        try:
            for line in stdout.split('\n'):
                if 'lattice parameter (alat)' in line:
                    alat = float(line.split('=')[1].split('a.u')[0])
                elif 'number of atoms/cell' in line:
                    nat = int(line.split('=')[1])
                elif 'number of atomic types' in line:
                    ntyp = int(line.split('=')[1])
                elif 'unit-cell volume' in line:
                    if '(a.u.)^3' in line:
                        volume = float(line.split('=')[1].split('(a.u.)^3')[0])
                    else:
                        # occurs in v5.3.0
                        volume = float(line.split('=')[1].split('a.u.^3')[0])
                elif 'number of Kohn-Sham states' in line:
                    nbnd = int(line.split('=')[1])
                elif 'number of k points' in line:
                    nk = int(line.split('=')[1].split()[0])
                    if input_parameters.get('SYSTEM', {}).get('nspin', 1) > 1:
                        # QE counts twice each k-point in spin-polarized calculations
                        nk /= 2
                elif 'Dense  grid' in line:
                    FFT_grid = [
                        int(g)
                        for g in line.split('(')[1].split(')')[0].split(',')
                    ]
                elif 'Smooth grid' in line:
                    smooth_FFT_grid = [
                        int(g)
                        for g in line.split('(')[1].split(')')[0].split(',')
                    ]
                    break
            alat *= bohr_to_ang
            volume *= bohr_to_ang**3
            parsed_data['lattice_parameter_initial'] = alat
            parsed_data['number_of_bands'] = nbnd
            try:
                parsed_data['number_of_k_points'] = nk
                parsed_data['fft_grid'] = FFT_grid
                parsed_data['smooth_fft_grid'] = smooth_FFT_grid
            except NameError:  # these are not crucial, so parsing does not fail if they are not found
                pass
        except NameError:  # nat or other variables where not found, and thus not initialized

            # Try to get some error messages
            lines = stdout.split('\n')

            for line_number, line in enumerate(lines):
                # Compare the line to the known set of error and warning messages and add them to the log container
                detect_important_message(logs, line)

            if len(logs.error) or len(logs.warning) > 0:
                parsed_data['trajectory'] = trajectory_data
                return parsed_data, logs

            # did not find any error message -> raise an Error and do not return anything
            raise QEOutputParsingError('Parser cannot load basic info.')
    else:
        nat = structure_data['number_of_atoms']
        ntyp = structure_data['number_of_species']
        alat = structure_data['lattice_parameter_xml']
        volume = structure_data['cell']['volume']
    # NOTE: lattice_parameter_xml is the lattice parameter of the xml file
    # in the units used by the code. lattice_parameter instead in angstroms.

    # Save these two quantities in the parsed_data, because they will be
    # useful for queries (maybe), and structure_data will not be stored as a Dict
    parsed_data['number_of_atoms'] = nat
    parsed_data['number_of_species'] = ntyp
    parsed_data['volume'] = volume

    c_bands_error = False

    # now grep quantities that can be considered isolated informations.
    for count, line in enumerate(data_lines):

        # Compare the line to the known set of error and warning messages and add them to the log container
        detect_important_message(logs, line)

        # to be used for later
        if 'Carrying out vdW-DF run using the following parameters:' in line:
            vdw_correction = True

        elif 'Cartesian axes' in line:
            # this is the part when initial positions and chemical
            # symbols are printed (they do not change during a run)
            i = count + 1
            while i < count + 10 and not ('site n.' in data_lines[i]
                                          and 'atom' in data_lines[i]):
                i += 1
            if 'site n.' in data_lines[i] and 'atom' in data_lines[i]:
                trajectory_data['atomic_species_name'] = [
                    data_lines[i + 1 + j].split()[1] for j in range(nat)
                ]

        # parse the initialization time (take only first occurence)
        elif ('init_wall_time_seconds' not in parsed_data
              and 'total cpu time spent up to now is' in line):
            init_time = float(
                line.split('total cpu time spent up to now is')[1].split(
                    'secs')[0])
            parsed_data['init_wall_time_seconds'] = init_time

        # parse dynamical RAM estimates
        elif 'Estimated max dynamical RAM per process' in line:
            value = line.split('>')[-1]
            match = re.match(
                r'\s+([+-]?\d+(\.\d*)?|\.\d+([eE][+-]?\d+)?)\s*(Mb|MB|GB)',
                value)
            if match:
                try:
                    parsed_data['estimated_ram_per_process'] = float(
                        match.group(1))
                    parsed_data['estimated_ram_per_process{}'.format(
                        units_suffix)] = match.group(4)
                except (IndexError, ValueError):
                    pass

        # parse dynamical RAM estimates
        elif 'Estimated total dynamical RAM' in line:
            value = line.split('>')[-1]
            match = re.match(
                r'\s+([+-]?\d+(\.\d*)?|\.\d+([eE][+-]?\d+)?)\s*(Mb|MB|GB)',
                value)
            if match:
                try:
                    parsed_data['estimated_ram_total'] = float(match.group(1))
                    parsed_data['estimated_ram_total{}'.format(
                        units_suffix)] = match.group(4)
                except (IndexError, ValueError):
                    pass

        # parse the global file, for informations that are written only once
        elif 'PWSCF' in line and 'WALL' in line:
            try:
                time = line.split('CPU')[1].split('WALL')[0]
                parsed_data['wall_time'] = time
            except Exception:
                logs.warning.append('Error while parsing wall time.')
            try:
                parsed_data['wall_time_seconds'] = convert_qe_time_to_sec(time)
            except ValueError:
                raise QEOutputParsingError(
                    'Unable to convert wall_time in seconds.')

        # for later control on relaxation-dynamics convergence
        elif 'nstep' in line and '=' in line:
            maximum_ionic_steps = int(line.split()[2])

        elif 'bfgs converged in' in line:
            marker_bfgs_converged = True

        elif 'number of bfgs steps' in line:

            try:
                parsed_data['number_ionic_steps'] += 1
            except KeyError:
                parsed_data['number_ionic_steps'] = 1

        elif 'A final scf calculation at the relaxed structure' in line:
            parsed_data['final_scf'] = True

        elif 'point group' in line:
            if 'k-point group' not in line:
                try:
                    # Split line in components delimited by either space(s) or
                    # parenthesis and filter out empty strings
                    line_elems = [
                        _f for _f in re.split(r' +|\(|\)', line) if _f
                    ]

                    pg_international = line_elems[-1]
                    pg_schoenflies = line_elems[-2]

                    parsed_data['pointgroup_international'] = pg_international
                    parsed_data['pointgroup_schoenflies'] = pg_schoenflies

                except Exception:
                    warning = 'Problem parsing point group, I found: {}'.format(
                        line.strip())
                    logs.warning.append(warning)

        # special parsing of c_bands error
        elif 'c_bands' in line and 'eigenvalues not converged' in line:
            c_bands_error = True

        elif 'iteration #' in line:
            if 'Calculation restarted' not in line and 'Calculation stopped' not in line:
                try:
                    parsed_data['total_number_of_scf_iterations'] += 1
                except KeyError:
                    parsed_data['total_number_of_scf_iterations'] = 1

            if c_bands_error:
                # if there is another iteration, c_bands is not necessarily a problem
                # I put a warning only if c_bands error appears in the last iteration
                c_bands_error = False

    if c_bands_error:
        logs.warning.append('c_bands: at least 1 eigenvalues not converged')

    # I split the output text in the atomic SCF calculations.
    # the initial part should be things already contained in the xml.
    # (cell, initial positions, kpoints, ...) and I skip them.
    # In case, parse for them before this point.
    # Put everything in a trajectory_data dictionary
    relax_steps = stdout.split('Self-consistent Calculation')[1:]
    relax_steps = [i.split('\n') for i in relax_steps]

    # now I create a bunch of arrays for every step.

    for data_step in relax_steps:
        trajectory_frame = {}

        for count, line in enumerate(data_step):

            if 'CELL_PARAMETERS' in line:
                try:
                    a1 = [float(s) for s in data_step[count + 1].split()]
                    a2 = [float(s) for s in data_step[count + 2].split()]
                    a3 = [float(s) for s in data_step[count + 3].split()]
                    # try except indexerror for not enough lines
                    lattice = line.split('(')[1].split(')')[0].split('=')
                    if lattice[0].lower() not in ['alat', 'bohr', 'angstrom']:
                        raise QEOutputParsingError(
                            'Error while parsing cell_parameters: ' +
                            'unsupported units {}'.format(lattice[0]))

                    if 'alat' in lattice[0].lower():
                        a1 = [alat * bohr_to_ang * float(s) for s in a1]
                        a2 = [alat * bohr_to_ang * float(s) for s in a2]
                        a3 = [alat * bohr_to_ang * float(s) for s in a3]
                        lattice_parameter_b = float(lattice[1])
                        if abs(lattice_parameter_b - alat) > lattice_tolerance:
                            raise QEOutputParsingError(
                                'Lattice parameters mismatch! ' +
                                '{} vs {}'.format(lattice_parameter_b, alat))
                    elif 'bohr' in lattice[0].lower():
                        lattice_parameter_b *= bohr_to_ang
                        a1 = [bohr_to_ang * float(s) for s in a1]
                        a2 = [bohr_to_ang * float(s) for s in a2]
                        a3 = [bohr_to_ang * float(s) for s in a3]
                    trajectory_data.setdefault('lattice_vectors_relax',
                                               []).append([a1, a2, a3])

                except Exception:
                    logs.warning.append(
                        'Error while parsing relaxation cell parameters.')

            elif 'ATOMIC_POSITIONS' in line:
                try:
                    this_key = 'atomic_positions_relax'
                    # the inizialization of tau prevent parsed_data to be associated
                    # to the pointer of the previous iteration
                    metric = line.split('(')[1].split(')')[0]
                    if metric == 'crystal':
                        this_key = 'atomic_fractionals_relax'
                    elif metric not in ['alat', 'bohr', 'angstrom']:
                        raise QEOutputParsingError(
                            'Error while parsing atomic_positions: units not supported.'
                        )
                    # TODO: check how to map the atoms in the original scheme
                    positions = []
                    for i in range(nat):
                        line2 = data_step[count + 1 + i].split()
                        tau = [float(s) for s in line2[1:4]]
                        if metric == 'alat':
                            tau = [alat * float(s) for s in tau]
                        elif metric == 'bohr':
                            tau = [bohr_to_ang * float(s) for s in tau]
                        positions.append(tau)
                    trajectory_data.setdefault(this_key, []).append(positions)
                except Exception:
                    logs.warning.append(
                        'Error while parsing relaxation atomic positions.')

            # NOTE: in the above, the chemical symbols are not those of AiiDA
            # since the AiiDA structure is different. So, I assume now that the
            # order of atoms is the same of the input atomic structure.

            # Computed dipole correction in slab geometries.
            # save dipole in debye units, only at last iteration of scf cycle
            elif 'Computed dipole along edir' in line:
                j = count + 3
                line2 = data_step[j]
                try:
                    units = line2.split()[-1]
                    if default_dipole_units.lower() not in units.lower(
                    ):  # only debye
                        raise QEOutputParsingError(
                            'Error parsing the dipole correction. Units {} are not supported.'
                            .format(units))
                    value = float(line2.split()[-2])
                except IndexError:  # on units
                    pass
                # save only the last dipole correction
                while 'Computed dipole along edir' not in line2:
                    j += 1
                    try:
                        line2 = data_step[j]
                    except IndexError:  # The dipole is also written at the beginning of a new bfgs iteration
                        break
                    if 'End of self-consistent calculation' in line2:
                        trajectory_data.setdefault('dipole', []).append(value)
                        parsed_data['dipole' +
                                    units_suffix] = default_dipole_units
                        break

            # saving the SCF convergence accuracy for each SCF cycle
            # If for some step this line is not printed, the later check with the scf_accuracy array length should catch it
            elif 'estimated scf accuracy' in line:
                try:
                    value = float(line.split()[-2]) * ry_to_ev
                    trajectory_data.setdefault('scf_accuracy',
                                               []).append(value)
                except Exception:
                    logs.warning.append('Error while parsing scf accuracy.')

            elif 'convergence has been achieved in' in line or 'convergence NOT achieved after' in line:
                try:
                    value = int(line.split('iterations')[0].split()[-1])
                    trajectory_data.setdefault('scf_iterations',
                                               []).append(value)
                except Exception:
                    logs.warning.append('Error while parsing scf iterations.')

            elif 'Calculation stopped in scf loop at iteration' in line:
                try:
                    value = int(line.split()[-1])
                    trajectory_data.setdefault('scf_iterations',
                                               []).append(value)
                except Exception:
                    logs.warning.append('Error while parsing scf iterations.')

            elif 'End of self-consistent calculation' in line:
                # parse energy threshold for diagonalization algorithm
                try:
                    j = 0
                    while True:
                        j -= 1
                        line2 = data_step[count + j]
                        if 'ethr' in line2:
                            value = float(line2.split('=')[1].split(',')[0])
                            break
                    trajectory_data.setdefault('energy_threshold',
                                               []).append(value)
                except Exception:
                    logs.warning.append('Error while parsing ethr.')

                # parse final magnetic moments, if present
                try:
                    j = 0
                    while True:
                        j -= 1
                        line2 = data_step[count + j]
                        if 'Magnetic moment per site' in line2:
                            break
                        if 'iteration' in line2:
                            raise QEOutputParsingError
                    mag_moments = []
                    charges = []
                    while True:
                        j += 1
                        line2 = data_step[count + j]
                        if 'atom:' in line2:
                            mag_moments.append(
                                float(line2.split('magn:')[1].split()[0]))
                            charges.append(
                                float(line2.split('charge:')[1].split()[0]))
                        if len(mag_moments) == nat:
                            break
                    trajectory_data.setdefault('atomic_magnetic_moments',
                                               []).append(mag_moments)
                    trajectory_data.setdefault('atomic_charges',
                                               []).append(charges)
                    parsed_data['atomic_magnetic_moments' +
                                units_suffix] = default_magnetization_units
                    parsed_data['atomic_charges' +
                                units_suffix] = default_charge_units
                except QEOutputParsingError:
                    pass

            # grep energy and possibly, magnetization
            elif '!' in line:
                try:

                    En = float(line.split('=')[1].split('Ry')[0]) * ry_to_ev
                    E_acc = float(
                        data_step[count +
                                  2].split('<')[1].split('Ry')[0]) * ry_to_ev

                    for key, value in [['energy', En],
                                       ['energy_accuracy', E_acc]]:
                        trajectory_data.setdefault(key, []).append(value)
                        parsed_data[key + units_suffix] = default_energy_units
                    # TODO: decide units for magnetization. now bohr mag/cell
                    j = 0
                    while True:
                        j += 1
                        line2 = data_step[count + j]

                        for string, key in [
                            [
                                'one-electron contribution',
                                'energy_one_electron'
                            ],
                            ['hartree contribution', 'energy_hartree'],
                            ['xc contribution', 'energy_xc'],
                            ['ewald contribution', 'energy_ewald'],
                            ['smearing contrib.', 'energy_smearing'],
                            [
                                'one-center paw contrib.',
                                'energy_one_center_paw'
                            ],
                            ['est. exchange err', 'energy_est_exchange'],
                            ['Fock energy', 'energy_fock'],
                            ['Hubbard energy', 'energy_hubbard'],
                                # Add also ENVIRON specific contribution to the total energy
                            ['solvation energy', 'energy_solvation'],
                            ['cavitation energy', 'energy_cavitation'],
                            ['PV energy', 'energy_pv'],
                            [
                                'periodic energy correct.',
                                'energy_pbc_correction'
                            ],
                            ['ionic charge energy', 'energy_ionic_charge'],
                            [
                                'external charges energy',
                                'energy_external_charges'
                            ]
                        ]:
                            if string in line2:
                                value = grep_energy_from_line(line2)
                                trajectory_data.setdefault(key,
                                                           []).append(value)
                                parsed_data[
                                    key + units_suffix] = default_energy_units
                        # magnetizations
                        if 'total magnetization' in line2:
                            this_m = line2.split('=')[1].split('Bohr')[0]
                            try:  # magnetization might be a scalar
                                value = float(this_m)
                            except ValueError:  # but can also be a three vector component in non-collinear calcs
                                value = [float(i) for i in this_m.split()]
                            trajectory_data.setdefault('total_magnetization',
                                                       []).append(value)
                            parsed_data[
                                'total_magnetization' +
                                units_suffix] = default_magnetization_units

                        elif 'absolute magnetization' in line2:
                            value = float(line2.split('=')[1].split('Bohr')[0])
                            trajectory_data.setdefault(
                                'absolute_magnetization', []).append(value)
                            parsed_data[
                                'absolute_magnetization' +
                                units_suffix] = default_magnetization_units
                        # exit loop
                        elif 'convergence' in line2:
                            break

                    if vdw_correction:
                        j = 0
                        while True:
                            j += -1
                            line2 = data_step[count + j]
                            if 'Non-local correlation energy' in line2:
                                value = grep_energy_from_line(line2)
                                trajectory_data.setdefault('energy_vdw',
                                                           []).append(value)
                                break
                        parsed_data['energy_vdw' +
                                    units_suffix] = default_energy_units
                except Exception:
                    logs.warning.append(
                        'Error while parsing for energy terms.')

            elif 'the Fermi energy is' in line:
                try:
                    value = float(line.split('is')[1].split('ev')[0])
                    trajectory_data.setdefault('fermi_energy',
                                               []).append(value)
                    parsed_data['fermi_energy' +
                                units_suffix] = default_energy_units
                except Exception:
                    logs.warning.append(
                        'Error while parsing Fermi energy from the output file.'
                    )

            elif 'Forces acting on atoms' in line:
                try:
                    forces = []
                    j = 0
                    while True:
                        j += 1
                        line2 = data_step[count + j]
                        if 'atom ' in line2:
                            line2 = line2.split('=')[1].split()
                            # CONVERT FORCES IN eV/Ang
                            vec = [
                                float(s) * ry_to_ev / bohr_to_ang
                                for s in line2
                            ]
                            forces.append(vec)
                        if len(forces) == nat:
                            break
                    trajectory_data.setdefault('forces', []).append(forces)
                    parsed_data['forces' + units_suffix] = default_force_units
                except Exception:
                    logs.warning.append('Error while parsing forces.')

            # TODO: adding the parsing support for the decomposition of the forces

            elif 'Total force =' in line:
                try:  # note that I can't check the units: not written in output!
                    value = float(line.split('=')[1].split('Total')
                                  [0]) * ry_to_ev / bohr_to_ang
                    trajectory_data.setdefault('total_force', []).append(value)
                    parsed_data['total_force' +
                                units_suffix] = default_force_units
                except Exception:
                    logs.warning.append('Error while parsing total force.')

            elif ('entering subroutine stress ...'
                  in line) or ('Computing stress (Cartesian axis) and pressure'
                               in line):
                try:
                    stress = []
                    for k in range(10 + 5 * vdw_correction):
                        if 'P=' in data_step[count + k + 1]:
                            count2 = count + k + 1
                    if '(Ry/bohr**3)' not in data_step[count2]:
                        raise QEOutputParsingError(
                            'Error while parsing stress: unexpected units.')
                    for k in range(3):
                        line2 = data_step[count2 + k + 1].split()
                        vec = [
                            float(s) * 10**(-9) * ry_si / (bohr_si)**3
                            for s in line2[0:3]
                        ]
                        stress.append(vec)
                    trajectory_data.setdefault('stress', []).append(stress)
                    parsed_data['stress' + units_suffix] = default_stress_units
                except Exception:
                    logs.warning.append('Error while parsing stress tensor.')

            # Electronic and ionic dipoles when 'lelfield' was set to True in input parameters
            elif lelfield is True:

                if 'Electronic Dipole per cell' in line:
                    electronic_dipole = float(line.split()[-1])
                    trajectory_frame.setdefault(
                        'electronic_dipole_cell_average',
                        []).append(electronic_dipole)

                elif 'Ionic Dipole per cell' in line:
                    ionic_dipole = float(line.split()[-1])
                    trajectory_frame.setdefault('ionic_dipole_cell_average',
                                                []).append(ionic_dipole)

                elif 'Electronic Dipole on Cartesian axes' in line:
                    electronic_dipole = [
                        float(data_step[count + i + 1].split()[1])
                        for i in range(3)
                    ]
                    trajectory_frame.setdefault(
                        'electronic_dipole_cartesian_axes',
                        []).append(electronic_dipole)

                elif 'Ionic Dipole on Cartesian axes' in line:
                    ionic_dipole = [
                        float(data_step[count + i + 1].split()[1])
                        for i in range(3)
                    ]
                    trajectory_frame.setdefault('ionic_dipole_cartesian_axes',
                                                []).append(ionic_dipole)

        # End of trajectory frame, only keep last entries for dipole related values
        if lelfield is True:

            # For every property only get the last entry if possible
            try:
                ed_cell = trajectory_frame[
                    'electronic_dipole_cell_average'].pop()
            except IndexError:
                ed_cell = None

            try:
                ed_axes = trajectory_frame[
                    'electronic_dipole_cartesian_axes'].pop()
            except IndexError:
                ed_axes = None

            try:
                id_cell = trajectory_frame['ionic_dipole_cell_average'].pop()
            except IndexError:
                id_cell = None

            try:
                id_axes = trajectory_frame['ionic_dipole_cartesian_axes'].pop()
            except IndexError:
                id_axes = None

            # Only add them if all four properties were successfully parsed
            if all([
                    value is not None
                    for value in [ed_cell, ed_axes, id_cell, id_axes]
            ]):
                trajectory_data.setdefault('electronic_dipole_cell_average',
                                           []).append(ed_cell)
                trajectory_data.setdefault('electronic_dipole_cartesian_axes',
                                           []).append(ed_axes)
                trajectory_data.setdefault('ionic_dipole_cell_average',
                                           []).append(id_cell)
                trajectory_data.setdefault('ionic_dipole_cartesian_axes',
                                           []).append(id_axes)

    # check consistency of scf_accuracy and scf_iterations
    if 'scf_accuracy' in trajectory_data:
        if 'scf_iterations' in trajectory_data:
            if len(trajectory_data['scf_accuracy']) != sum(
                    trajectory_data['scf_iterations']):
                logs.warning.append(
                    'the length of scf_accuracy does not match the sum of the elements of scf_iterations.'
                )
        else:
            logs.warning.append(
                '"the scf_accuracy array was parsed but the scf_iterations was not.'
            )

    # If specified in the parser options, parse the atomic occupations
    parse_atomic_occupations = parser_options.get('parse_atomic_occupations',
                                                  False)

    if parse_atomic_occupations:

        atomic_occupations = {}
        hubbard_blocks = stdout.split('LDA+U parameters')

        for line in hubbard_blocks[-1].split('\n'):

            if 'Tr[ns(na)]' in line:

                values = line.split('=')
                atomic_index = values[0].split()[1]
                occupations = values[1].split()

                if len(occupations) == 1:
                    atomic_occupations[atomic_index] = {
                        'total': occupations[0]
                    }
                elif len(occupations) == 3:
                    atomic_occupations[atomic_index] = {
                        'up': occupations[0],
                        'down': occupations[1],
                        'total': occupations[2]
                    }
                else:
                    continue

        parsed_data['atomic_occupations'] = atomic_occupations

    # Ionic calculations and BFGS algorithm did not print that calculation is converged
    if 'atomic_positions_relax' in trajectory_data and not marker_bfgs_converged:
        logs.error.append('ERROR_IONIC_CONVERGENCE_NOT_REACHED')

    # Ionic calculation that hit the maximum number of ionic steps. Note: does not necessarily mean that convergence was
    # not reached as it could have occurred in the last step.
    if maximum_ionic_steps is not None and maximum_ionic_steps == parsed_data.get(
            'number_ionic_steps', None):
        logs.warning.append('ERROR_MAXIMUM_IONIC_STEPS_REACHED')

    # Remove duplicate log messages by turning it into a set. Then convert back to list as that is what is expected
    logs.error = list(set(logs.error))
    logs.warning = list(set(logs.warning))

    parsed_data['bands'] = bands_data
    parsed_data['structure'] = structure_data
    parsed_data['trajectory'] = trajectory_data

    return parsed_data, logs
Пример #7
0
def parse_raw_ph_output(stdout, tensors=None, dynamical_matrices=None):
    """Parses the raw output of a Quantum ESPRESSO `ph.x` calculation.

    :param stdout: the content of the stdout file as a string
    :param tensors: the content of the tensors.xml file as a string
    :param dynamical_matrices: a list of the content of the dynamical matrix files as a string
    :returns: tuple of two dictionaries, with the parsed data and log messages, respectively
    """
    logs = get_logging_container()
    data_lines = stdout.split('\n')

    # First check whether the `JOB DONE` message was written, otherwise the job was interrupted
    for line in data_lines:
        if 'JOB DONE' in line:
            break
    else:
        logs.error.append('ERROR_OUTPUT_STDOUT_INCOMPLETE')

    # Parse tensors, if present
    tensor_data = {}
    if tensors:
        try:
            tensor_data = parse_ph_tensor(tensors)
        except QEOutputParsingError:
            logs.warning.append('Error while parsing the tensor files')

    out_data = parse_ph_text_output(data_lines, logs)

    # parse dynamical matrices if present
    dynmat_data = {}
    if dynamical_matrices:
        # find lattice parameter
        for dynmat_counter, dynmat in enumerate(dynamical_matrices):

            lines = dynmat.split('\n')

            # check if the file contains frequencies (i.e. is useful) or not
            dynmat_to_parse = False
            if not lines:
                continue
            try:
                _ = [float(i) for i in lines[0].split()]
            except ValueError:
                dynmat_to_parse = True

            if not dynmat_to_parse:
                continue

            # parse it
            this_dynmat_data = parse_ph_dynmat(lines, logs)

            # join it with the previous dynmat info
            dynmat_data[
                f'dynamical_matrix_{dynmat_counter}'] = this_dynmat_data
            # TODO: use the bands format?

    # join dictionaries, there should not be any twice repeated key
    for key in out_data.keys():
        if key in list(tensor_data.keys()):
            raise AssertionError(f'{key} found in two dictionaries')
        if key in list(dynmat_data.keys()):
            raise AssertionError(f'{key} found in two dictionaries')

    # I don't check the dynmat_data and parser_info keys
    parsed_data = dict(
        list(dynmat_data.items()) + list(out_data.items()) +
        list(tensor_data.items()))

    return parsed_data, logs
Пример #8
0
def parse_xml_post_6_2(xml):
    """Parse the content of XML output file written by `pw.x` and `cp.x` with the new schema-based XML format.

    :param xml: parsed XML
    :returns: tuple of two dictionaries, with the parsed data and log messages, respectively
    """
    e_bohr2_to_coulomb_m2 = 57.214766  # e/a0^2 to C/m^2 (electric polarization) from Wolfram Alpha

    logs = get_logging_container()

    schema_filepath = get_schema_filepath(xml)

    try:
        xsd = XMLSchema(schema_filepath)
    except URLError:

        # If loading the XSD file specified in the XML file fails, we try the default
        schema_filepath_default = get_default_schema_filepath()

        try:
            xsd = XMLSchema(schema_filepath_default)
        except URLError:
            raise XMLParseError(
                f'Could not open or parse the XSD files {schema_filepath} and {schema_filepath_default}'
            )
        else:
            schema_filepath = schema_filepath_default

    # Validate XML document against the schema
    # Returned dictionary has a structure where, if tag ['key'] is "simple", xml_dictionary['key'] returns its content.
    # Otherwise, the following keys are available:
    #
    #  xml_dictionary['key']['$'] returns its content
    #  xml_dictionary['key']['@attr'] returns its attribute 'attr'
    #  xml_dictionary['key']['nested_key'] goes one level deeper.

    xml_dictionary, errors = xsd.to_dict(xml, validation='lax')
    if errors:
        logs.error.append(
            f'{len(errors)} XML schema validation error(s) schema: {schema_filepath}:'
        )
        for err in errors:
            logs.error.append(str(err))

    xml_version = StrictVersion(
        xml_dictionary['general_info']['xml_format']['@VERSION'])
    inputs = xml_dictionary.get('input', {})
    outputs = xml_dictionary['output']

    lattice_vectors = [
        [
            x * CONSTANTS.bohr_to_ang
            for x in outputs['atomic_structure']['cell']['a1']
        ],
        [
            x * CONSTANTS.bohr_to_ang
            for x in outputs['atomic_structure']['cell']['a2']
        ],
        [
            x * CONSTANTS.bohr_to_ang
            for x in outputs['atomic_structure']['cell']['a3']
        ],
    ]

    has_electric_field = inputs.get('electric_field',
                                    {}).get('electric_potential',
                                            None) == 'sawtooth_potential'
    has_dipole_correction = inputs.get('electric_field',
                                       {}).get('dipole_correction', False)

    if 'occupations' in inputs.get('bands', {}):
        try:
            occupations = inputs['bands']['occupations']['$']  # yapf: disable
        except TypeError:  # "string indices must be integers" -- might have attribute 'nspin'
            occupations = inputs['bands']['occupations']
    else:
        occupations = None

    starting_magnetization = []
    magnetization_angle1 = []
    magnetization_angle2 = []

    for specie in outputs['atomic_species']['species']:
        starting_magnetization.append(specie.get('starting_magnetization',
                                                 0.0))
        magnetization_angle1.append(specie.get('magnetization_angle1', 0.0))
        magnetization_angle2.append(specie.get('magnetization_angle2', 0.0))

    constraint_mag = 0
    spin_constraints = inputs.get('spin_constraints',
                                  {}).get('spin_constraints', None)
    if spin_constraints == 'atomic':
        constraint_mag = 1
    elif spin_constraints == 'atomic direction':
        constraint_mag = 2
    elif spin_constraints == 'total':
        constraint_mag = 3
    elif spin_constraints == 'total direction':
        constraint_mag = 6

    lsda = inputs.get('spin', {}).get('lsda', False)
    spin_orbit_calculation = inputs.get('spin', {}).get('spinorbit', False)
    non_colinear_calculation = outputs['magnetization']['noncolin']
    do_magnetization = outputs['magnetization']['do_magnetization']

    # Time reversal symmetry of the system
    if non_colinear_calculation and do_magnetization:
        time_reversal = False
    else:
        time_reversal = True

    # If no specific tags are present, the default is 1
    if non_colinear_calculation or spin_orbit_calculation:
        nspin = 4
    elif lsda:
        nspin = 2
    else:
        nspin = 1

    symmetries = []
    lattice_symmetries = [
    ]  # note: will only contain lattice symmetries that are NOT crystal symmetries
    inversion_symmetry = False

    # See also PW/src/setup.f90
    nsym = outputs.get('symmetries', {}).get('nsym',
                                             None)  # crystal symmetries
    nrot = outputs.get('symmetries', {}).get('nrot',
                                             None)  # lattice symmetries

    for symmetry in outputs.get('symmetries', {}).get('symmetry', []):

        # There are two types of symmetries, lattice and crystal. The pure inversion (-I) is always a lattice symmetry,
        # so we don't care. But if the pure inversion is also a crystal symmetry, then then the system as a whole
        # has (by definition) inversion symmetry; so we set the global property inversion_symmetry = True.
        symmetry_type = symmetry['info']['$']
        symmetry_name = symmetry['info']['@name']
        if symmetry_type == 'crystal_symmetry' and symmetry_name.lower(
        ) == 'inversion':
            inversion_symmetry = True

        sym = {
            'rotation': [
                symmetry['rotation']['$'][0:3],
                symmetry['rotation']['$'][3:6],
                symmetry['rotation']['$'][6:9],
            ],
            'name':
            symmetry_name,
        }

        try:
            sym['t_rev'] = '1' if symmetry['info']['@time_reversal'] else '0'
        except KeyError:
            sym['t_rev'] = '0'

        try:
            sym['equivalent_atoms'] = symmetry['equivalent_atoms']['$']
        except KeyError:
            pass

        try:
            sym['fractional_translation'] = symmetry['fractional_translation']
        except KeyError:
            pass

        if symmetry_type == 'crystal_symmetry':
            symmetries.append(sym)
        elif symmetry_type == 'lattice_symmetry':
            lattice_symmetries.append(sym)
        else:
            raise XMLParseError(
                f'Unexpected type of symmetry: {symmetry_type}')

    if (nsym != len(symmetries)) or (
            nrot != len(symmetries) + len(lattice_symmetries)):
        logs.warning.append(
            'Inconsistent number of symmetries: nsym={}, nrot={}, len(symmetries)={}, len(lattice_symmetries)={}'
            .format(nsym, nrot, len(symmetries), len(lattice_symmetries)))

    xml_data = {
        #'pp_check_flag': True, # Currently not printed in the new format.
        # Signals whether the XML file is complete
        # and can be used for post-processing. Everything should be in the XML now, but in
        # any case, the new XML schema should mostly protect from incomplete files.
        'lkpoint_dir':
        False,  # Currently not printed in the new format.
        # Signals whether kpt-data are written in sub-directories.
        # Was generally true in the old format, but now all the eigenvalues are
        # in the XML file, under output / band_structure, so this is False.
        'charge_density':
        './charge-density.dat',  # A file name. Not printed in the new format.
        # The filename and path are considered fixed: <outdir>/<prefix>.save/charge-density.dat
        # TODO: change to .hdf5 if output format is HDF5 (issue #222)
        'rho_cutoff_units':
        'eV',
        'wfc_cutoff_units':
        'eV',
        'fermi_energy_units':
        'eV',
        'k_points_units':
        '1 / angstrom',
        'symmetries_units':
        'crystal',
        'constraint_mag':
        constraint_mag,
        'magnetization_angle2':
        magnetization_angle2,
        'magnetization_angle1':
        magnetization_angle1,
        'starting_magnetization':
        starting_magnetization,
        'has_electric_field':
        has_electric_field,
        'has_dipole_correction':
        has_dipole_correction,
        'lda_plus_u_calculation':
        'dftU' in outputs,
        'format_name':
        xml_dictionary['general_info']['xml_format']['@NAME'],
        'format_version':
        xml_dictionary['general_info']['xml_format']['@VERSION'],
        # TODO: check that format version: a) matches the XSD schema version; b) is updated as well
        #       See line 43 in Modules/qexsd.f90
        'creator_name':
        xml_dictionary['general_info']['creator']['@NAME'].lower(),
        'creator_version':
        xml_dictionary['general_info']['creator']['@VERSION'],
        'non_colinear_calculation':
        non_colinear_calculation,
        'do_magnetization':
        do_magnetization,
        'time_reversal_flag':
        time_reversal,
        'symmetries':
        symmetries,
        'lattice_symmetries':
        lattice_symmetries,
        'do_not_use_time_reversal':
        inputs.get('symmetry_flags', {}).get('noinv', None),
        'spin_orbit_domag':
        outputs['magnetization']['do_magnetization'],
        'fft_grid': [
            value
            for _, value in sorted(outputs['basis_set']['fft_grid'].items())
        ],
        'lsda':
        lsda,
        'number_of_spin_components':
        nspin,
        'no_time_rev_operations':
        inputs.get('symmetry_flags', {}).get('no_t_rev', None),
        'inversion_symmetry':
        inversion_symmetry,  # the old tag was INVERSION_SYMMETRY and was set to (from the code): "invsym    if true the system has inversion symmetry"
        'number_of_bravais_symmetries':
        nrot,  # lattice symmetries
        'number_of_symmetries':
        nsym,  # crystal symmetries
        'wfc_cutoff':
        inputs.get('basis', {}).get('ecutwfc', -1.0) * CONSTANTS.hartree_to_ev,
        'rho_cutoff':
        outputs['basis_set']['ecutrho'] *
        CONSTANTS.hartree_to_ev,  # not always printed in input->basis
        'smooth_fft_grid': [
            value
            for _, value in sorted(outputs['basis_set']['fft_smooth'].items())
        ],
        'dft_exchange_correlation':
        inputs.get('dft', {}).get(
            'functional',
            None),  # TODO: also parse optional elements of 'dft' tag
        # WARNING: this is different between old XML and new XML
        'spin_orbit_calculation':
        spin_orbit_calculation,
        'q_real_space':
        outputs['algorithmic_info']['real_space_q'],
    }

    # alat is technically an optional attribute according to the schema,
    # but I don't know what to do if it's missing. atomic_structure is mandatory.
    output_alat_bohr = outputs['atomic_structure']['@alat']
    output_alat_angstrom = output_alat_bohr * CONSTANTS.bohr_to_ang

    # Band structure
    if 'band_structure' in outputs:
        band_structure = outputs['band_structure']

        smearing_xml = None

        if 'smearing' in outputs['band_structure']:
            smearing_xml = outputs['band_structure']['smearing']
        elif 'smearing' in inputs:
            smearing_xml = inputs['smearing']

        if smearing_xml:
            degauss = smearing_xml['@degauss']

            # Versions below 19.03.04 (Quantum ESPRESSO<=6.4.1) incorrectly print degauss in Ry instead of Hartree
            if xml_version < StrictVersion('19.03.04'):
                degauss *= CONSTANTS.ry_to_ev
            else:
                degauss *= CONSTANTS.hartree_to_ev

            xml_data['degauss'] = degauss
            xml_data['smearing_type'] = smearing_xml['$']

        num_k_points = band_structure['nks']
        num_electrons = band_structure['nelec']
        num_atomic_wfc = band_structure['num_of_atomic_wfc']
        num_bands = band_structure.get('nbnd', None)
        num_bands_up = band_structure.get('nbnd_up', None)
        num_bands_down = band_structure.get('nbnd_dw', None)

        if num_bands is None and num_bands_up is None and num_bands_down is None:
            raise XMLParseError(
                'None of `nbnd`, `nbnd_up` or `nbdn_dw` could be parsed.')

        # If both channels are `None` we are dealing with a non spin-polarized or non-collinear calculation
        elif num_bands_up is None and num_bands_down is None:
            spins = False

        # If only one of the channels is `None` we raise, because that is an inconsistent result
        elif num_bands_up is None or num_bands_down is None:
            raise XMLParseError(
                'Only one of `nbnd_up` and `nbnd_dw` could be parsed')

        # Here it is a spin-polarized calculation, where for pw.x the number of bands in each channel should be identical.
        else:
            spins = True
            if num_bands_up != num_bands_down:
                raise XMLParseError(
                    f'different number of bands for spin channels: {num_bands_up} and {num_bands_down}'
                )

            if num_bands is not None and num_bands != num_bands_up + num_bands_down:
                raise XMLParseError(
                    'Inconsistent number of bands: nbnd={}, nbnd_up={}, nbnd_down={}'
                    .format(num_bands, num_bands_up, num_bands_down))

            if num_bands is None:
                num_bands = num_bands_up + num_bands_down  # backwards compatibility;

        k_points = []
        k_points_weights = []
        ks_states = band_structure['ks_energies']
        for ks_state in ks_states:
            k_points.append([
                kp * 2 * np.pi / output_alat_angstrom
                for kp in ks_state['k_point']['$']
            ])
            k_points_weights.append(ks_state['k_point']['@weight'])

        if not spins:
            band_eigenvalues = [[]]
            band_occupations = [[]]
            for ks_state in ks_states:
                band_eigenvalues[0].append(ks_state['eigenvalues']['$'])
                band_occupations[0].append(ks_state['occupations']['$'])
        else:
            band_eigenvalues = [[], []]
            band_occupations = [[], []]
            for ks_state in ks_states:
                band_eigenvalues[0].append(
                    ks_state['eigenvalues']['$'][0:num_bands_up])
                band_eigenvalues[1].append(
                    ks_state['eigenvalues']['$'][num_bands_up:num_bands])
                band_occupations[0].append(
                    ks_state['occupations']['$'][0:num_bands_up])
                band_occupations[1].append(
                    ks_state['occupations']['$'][num_bands_up:num_bands])

        band_eigenvalues = np.array(band_eigenvalues) * CONSTANTS.hartree_to_ev
        band_occupations = np.array(band_occupations)

        if not spins:
            parser_assert_equal(band_eigenvalues.shape,
                                (1, num_k_points, num_bands),
                                'Unexpected shape of band_eigenvalues')
            parser_assert_equal(band_occupations.shape,
                                (1, num_k_points, num_bands),
                                'Unexpected shape of band_occupations')
        else:
            parser_assert_equal(band_eigenvalues.shape,
                                (2, num_k_points, num_bands_up),
                                'Unexpected shape of band_eigenvalues')
            parser_assert_equal(band_occupations.shape,
                                (2, num_k_points, num_bands_up),
                                'Unexpected shape of band_occupations')

        if not spins:
            xml_data['number_of_bands'] = num_bands
        else:
            # For collinear spin-polarized calculations `spins=True` and `num_bands` is sum of both channels. To get the
            # actual number of bands, we divide by two using integer division
            xml_data['number_of_bands'] = num_bands // 2

        for key, value in [('number_of_bands_up', num_bands_up),
                           ('number_of_bands_down', num_bands_down)]:
            if value is not None:
                xml_data[key] = value

        if 'fermi_energy' in band_structure:
            xml_data['fermi_energy'] = band_structure[
                'fermi_energy'] * CONSTANTS.hartree_to_ev

        bands_dict = {
            'occupations': band_occupations,
            'bands': band_eigenvalues,
            'bands_units': 'eV',
        }

        xml_data['number_of_atomic_wfc'] = num_atomic_wfc
        xml_data['number_of_k_points'] = num_k_points
        xml_data['number_of_electrons'] = num_electrons
        xml_data['k_points'] = k_points
        xml_data['k_points_weights'] = k_points_weights
        xml_data['bands'] = bands_dict

    try:
        monkhorst_pack = inputs['k_points_IBZ']['monkhorst_pack']
    except KeyError:
        pass  # not using Monkhorst pack
    else:
        xml_data['monkhorst_pack_grid'] = [
            monkhorst_pack[attr] for attr in ['@nk1', '@nk2', '@nk3']
        ]
        xml_data['monkhorst_pack_offset'] = [
            monkhorst_pack[attr] for attr in ['@k1', '@k2', '@k3']
        ]

    if occupations is not None:
        xml_data['occupations'] = occupations

    if 'boundary_conditions' in outputs and 'assume_isolated' in outputs[
            'boundary_conditions']:
        xml_data['assume_isolated'] = outputs['boundary_conditions'][
            'assume_isolated']

    # This is not printed by QE 6.3, but will be re-added before the next version
    if 'real_space_beta' in outputs['algorithmic_info']:
        xml_data['beta_real_space'] = outputs['algorithmic_info'][
            'real_space_beta']

    conv_info = {}
    conv_info_scf = {}
    conv_info_opt = {}
    # NOTE: n_scf_steps refers to the number of SCF steps in the *last* loop only.
    # To get the total number of SCF steps in the run you should sum up the individual steps.
    # TODO: should we parse 'steps' too? Are they already added in the output trajectory?
    for key in ['convergence_achieved', 'n_scf_steps', 'scf_error']:
        try:
            conv_info_scf[key] = outputs['convergence_info']['scf_conv'][key]
        except KeyError:
            pass
    for key in ['convergence_achieved', 'n_opt_steps', 'grad_norm']:
        try:
            conv_info_opt[key] = outputs['convergence_info']['opt_conv'][key]
        except KeyError:
            pass
    if conv_info_scf:
        conv_info['scf_conv'] = conv_info_scf
    if conv_info_opt:
        conv_info['opt_conv'] = conv_info_opt
    if conv_info:
        xml_data['convergence_info'] = conv_info

    if 'status' in xml_dictionary:
        xml_data['exit_status'] = xml_dictionary['status']
        # 0 = convergence reached;
        # -1 = SCF convergence failed;
        # 3 = ionic convergence failed
        # These might be changed in the future. Also see PW/src/run_pwscf.f90

    try:
        berry_phase = outputs['electric_field']['BerryPhase']
    except KeyError:
        pass
    else:
        # This is what I would like to do, but it's not retro-compatible
        # xml_data['berry_phase'] = {}
        # xml_data['berry_phase']['total_phase']         = berry_phase['totalPhase']['$']
        # xml_data['berry_phase']['total_phase_modulus'] = berry_phase['totalPhase']['@modulus']
        # xml_data['berry_phase']['total_ionic_phase']      = berry_phase['totalPhase']['@ionic']
        # xml_data['berry_phase']['total_electronic_phase'] = berry_phase['totalPhase']['@electronic']
        # xml_data['berry_phase']['total_polarization']           = berry_phase['totalPolarization']['polarization']['$']
        # xml_data['berry_phase']['total_polarization_modulus']   = berry_phase['totalPolarization']['modulus']
        # xml_data['berry_phase']['total_polarization_units']     = berry_phase['totalPolarization']['polarization']['@Units']
        # xml_data['berry_phase']['total_polarization_direction'] = berry_phase['totalPolarization']['direction']
        # parser_assert_equal(xml_data['berry_phase']['total_phase_modulus'].lower(), '(mod 2)',
        #                    "Unexpected modulus for total phase")
        # parser_assert_equal(xml_data['berry_phase']['total_polarization_units'].lower(), 'e/bohr^2',
        #                    "Unsupported units for total polarization")
        # Retro-compatible keys:
        polarization = berry_phase['totalPolarization']['polarization']['$']
        polarization_units = berry_phase['totalPolarization']['polarization'][
            '@Units']
        polarization_modulus = berry_phase['totalPolarization']['modulus']
        parser_assert(
            polarization_units in ['e/bohr^2', 'C/m^2'],
            f"Unsupported units '{polarization_units}' of total polarization")
        if polarization_units == 'e/bohr^2':
            polarization *= e_bohr2_to_coulomb_m2
            polarization_modulus *= e_bohr2_to_coulomb_m2

        xml_data['total_phase'] = berry_phase['totalPhase']['$']
        xml_data['total_phase_units'] = '2pi'
        xml_data['ionic_phase'] = berry_phase['totalPhase']['@ionic']
        xml_data['ionic_phase_units'] = '2pi'
        xml_data['electronic_phase'] = berry_phase['totalPhase']['@electronic']
        xml_data['electronic_phase_units'] = '2pi'
        xml_data['polarization'] = polarization
        xml_data[
            'polarization_module'] = polarization_modulus  # should be called "modulus"
        xml_data['polarization_units'] = 'C / m^2'
        xml_data['polarization_direction'] = berry_phase['totalPolarization'][
            'direction']
        # TODO: add conversion for (e/Omega).bohr (requires to know Omega, the volume of the cell)
        # TODO (maybe): Not parsed:
        # - individual ionic phases
        # - individual electronic phases and weights

    # TODO: We should put the `non_periodic_cell_correction` string in (?)
    atoms = [[
        atom['@name'], [coord * CONSTANTS.bohr_to_ang for coord in atom['$']]
    ] for atom in outputs['atomic_structure']['atomic_positions']['atom']]
    species = outputs['atomic_species']['species']
    structure_data = {
        'atomic_positions_units':
        'Angstrom',
        'direct_lattice_vectors_units':
        'Angstrom',
        # ??? 'atoms_if_pos_list': [[1, 1, 1], [1, 1, 1]],
        'number_of_atoms':
        outputs['atomic_structure']['@nat'],
        'lattice_parameter':
        output_alat_angstrom,
        'reciprocal_lattice_vectors': [
            outputs['basis_set']['reciprocal_lattice']['b1'],
            outputs['basis_set']['reciprocal_lattice']['b2'],
            outputs['basis_set']['reciprocal_lattice']['b3']
        ],
        'atoms':
        atoms,
        'cell': {
            'lattice_vectors': lattice_vectors,
            'volume': cell_volume(*lattice_vectors),
            'atoms': atoms,
        },
        'lattice_parameter_xml':
        output_alat_bohr,
        'number_of_species':
        outputs['atomic_species']['@ntyp'],
        'species': {
            'index': [i + 1 for i, specie in enumerate(species)],
            'pseudo': [specie['pseudo_file'] for specie in species],
            'mass': [specie['mass'] for specie in species],
            'type': [specie['@name'] for specie in species]
        },
    }

    xml_data['structure'] = structure_data

    return xml_data, logs
Пример #9
0
def parse_pw_xml_pre_6_2(xml_file, dir_with_bands):
    """Parse the content of XML output file written by `pw.x` with the old schema-less XML format.

    :param xml_file: filelike object to the XML output file
    :param dir_with_bands: absolute filepath to directory containing k-point XML files
    :returns: tuple of two dictionaries, with the parsed data and log messages, respectively
    """
    import copy
    from xml.parsers.expat import ExpatError

    logs = get_logging_container()

    # NOTE : I often assume that if the xml file has been written, it has no internal errors.
    try:
        dom = parse(xml_file)
    except ExpatError:
        logs.error.append('Error in XML parseString: bad format')
        parsed = {
            'bands': {},
            'structure': {},
        }
        return parsed, logs

    parsed_data = {}

    structure_dict = {}
    # CARD CELL
    structure_dict, lattice_vectors, volume = copy.deepcopy(
        xml_card_cell(structure_dict, dom))

    # CARD IONS
    structure_dict = copy.deepcopy(
        xml_card_ions(structure_dict, dom, lattice_vectors, volume))

    #CARD HEADER
    parsed_data = copy.deepcopy(xml_card_header(parsed_data, dom))

    # CARD CONTROL
    cardname = 'CONTROL'
    target_tags = read_xml_card(dom, cardname)
    for tagname in [
            'PP_CHECK_FLAG', 'LKPOINT_DIR', 'Q_REAL_SPACE', 'BETA_REAL_SPACE'
    ]:
        parsed_data[tagname.lower()] = parse_xml_child_bool(
            tagname, target_tags)

    # TODO: why this one isn't working? What is it actually?
#    # CARD MOVING_CELL
#
#    try:
#        target_tags = dom.getElementsByTagName('MOVING_CELL')[0]
#    except:
#        raise IOError
#
#    tagname='CELL_FACTOR'
#    parsed_data[tagname.lower()]=parse_xml_child_float(tagname,target_tags)

# CARD ELECTRIC_FIELD
    cardname = 'ELECTRIC_FIELD'
    target_tags = read_xml_card(dom, cardname)
    for tagname in ['HAS_ELECTRIC_FIELD', 'HAS_DIPOLE_CORRECTION']:
        parsed_data[tagname.lower()] = parse_xml_child_bool(
            tagname, target_tags)

    if parsed_data['has_electric_field'] or parsed_data[
            'has_dipole_correction']:
        tagname = 'FIELD_DIRECTION'
        parsed_data[tagname.lower()] = parse_xml_child_integer(
            tagname, target_tags)

        for tagname in [
                'MAXIMUM_POSITION', 'INVERSE_REGION', 'FIELD_AMPLITUDE'
        ]:
            parsed_data[tagname.lower()] = parse_xml_child_float(
                tagname, target_tags)

    # CARD PLANE_WAVES
    parsed_data = copy.deepcopy(xml_card_planewaves(parsed_data, dom, 'pw'))

    # CARD SPIN
    parsed_data = copy.deepcopy(xml_card_spin(parsed_data, dom))

    # CARD BRILLOUIN ZONE
    cardname = 'BRILLOUIN_ZONE'
    target_tags = read_xml_card(dom, cardname)

    tagname = 'NUMBER_OF_K-POINTS'
    parsed_data[tagname.replace('-', '_').lower()] = parse_xml_child_integer(
        tagname, target_tags)

    tagname = 'UNITS_FOR_K-POINTS'
    attrname = 'UNITS'
    metric = parse_xml_child_attribute_str(tagname, attrname, target_tags)
    if metric not in ['2 pi / a']:
        raise QEOutputParsingError('Error parsing attribute {},'.format(attrname) + \
                ' tag {} inside {}, units unknown'.format(tagname, target_tags.tagName) )
    k_points_units = metric

    for tagname, param in [['MONKHORST_PACK_GRID', 'nk'],
                           ['MONKHORST_PACK_OFFSET', 'k']]:
        try:
            #a = target_tags.getElementsByTagName(tagname)[0]
            a = [_ for _ in target_tags.childNodes if _.nodeName == tagname][0]
            value = [int(a.getAttribute(param + str(i + 1))) for i in range(3)]
            parsed_data[tagname.replace('-', '_').lower()] = value
        except Exception:  # I might not use the monkhorst pack grid
            pass

    kpoints = []
    kpoints_weights = []

    tagname_prefix = 'K-POINT.'
    a_dict = {
        _.nodeName: _
        for _ in target_tags.childNodes
        if _.nodeName.startswith(tagname_prefix)
    }

    try:
        import numpy
        for i in range(parsed_data['number_of_k_points']):
            tagname = '{}{}'.format(tagname_prefix, i + 1)
            #a = target_tags.getElementsByTagName(tagname)[0]
            a = a_dict[tagname]
            b = a.getAttribute('XYZ').replace('\n', '').rsplit()
            value = [float(s) for s in b]
            metric = k_points_units
            if metric == '2 pi / a':
                value = [
                    2. * numpy.pi * float(s) /
                    structure_dict['lattice_parameter'] for s in value
                ]
                weight = float(a.getAttribute('WEIGHT'))
                kpoints.append(value)
                kpoints_weights.append(weight)
        parsed_data['k_points'] = kpoints
        parsed_data['k_points' + units_suffix] = default_k_points_units
        parsed_data['k_points_weights'] = kpoints_weights
    except Exception:
        raise QEOutputParsingError(
            'Error parsing tag K-POINT.{} inside {}.'.format(
                i + 1, target_tags.tagName))

    # I skip this card until someone will have a need for this.
#     try:
#         tagname='STARTING_K-POINTS'
#         num_starting_k_points=parse_xml_child_integer(tagname,target_tags)
#         # raise exception if there is no such a key
#         parsed_data[tagname.replace('-','_').lower()]=num_starting_k_points
#
#         if parsed_data.get('starting_k_points'):
#             try:
#                 kpoints=[]
#                 for i in range(parsed_data['starting_k_points']):
#                     tagname='K-POINT_START.'+str(i+1)
#                     a=target_tags.getElementsByTagName(tagname)[0]
#                     b=a.getAttribute('XYZ').replace('\n','').rsplit()
#                     value=[ float(s) for s in b ]
#                     metric=parsed_data['k_points_units']
#                     if metric=='2 pi / a':
#                         value=[ float(s)/parsed_data['lattice_parameter'] for s in value ]
#
#                         weight=float(a.getAttribute('WEIGHT'))
#
#                         kpoints.append([value,weight])
#
#                 parsed_data['k_point_start']=kpoints
#             except Exception:
#                 raise QEOutputParsingError('Error parsing tag {}'.format(tagname)+\
#                                            ' inside {}.'.format(target_tags.tagName ) )
#     except Exception:
#         if not parsed_data.get('starting_k_points'):
#             pass
#         else:
#             parsed_data['xml_warnings'].append("Warning: could not parse {}".format(tagname))

# tagname='NORM-OF-Q'
# TODO: decide if save this parameter
# parsed_data[tagname.replace('-','_').lower()]=parse_xml_child_float(tagname,target_tags)

# CARD BAND STRUCTURE INFO
    cardname = 'BAND_STRUCTURE_INFO'
    target_tags = read_xml_card(dom, cardname)

    for tagname in [
            'NUMBER_OF_SPIN_COMPONENTS', 'NUMBER_OF_ATOMIC_WFC',
            'NUMBER_OF_BANDS'
    ]:
        parsed_data[tagname.replace('-','_').lower()] = \
            parse_xml_child_integer(tagname,target_tags)

    tagname = 'NON-COLINEAR_CALCULATION'
    parsed_data[tagname.replace('-','_').lower()] = \
        parse_xml_child_bool(tagname,target_tags)

    tagname = 'NUMBER_OF_ELECTRONS'
    parsed_data[tagname.replace('-','_').lower()] = \
        parse_xml_child_float(tagname,target_tags)

    tagname = 'UNITS_FOR_ENERGIES'
    attrname = 'UNITS'
    units = parse_xml_child_attribute_str(tagname, attrname, target_tags)
    if units not in ['hartree']:
        raise QEOutputParsingError(
            'Expected energy units in Hartree. Got instead {}'.format(
                parsed_data['energy_units']))

    try:
        tagname = 'TWO_FERMI_ENERGIES'
        parsed_data[tagname.lower()] = parse_xml_child_bool(
            tagname, target_tags)
    except Exception:
        pass

    if parsed_data.get('two_fermi_energies', False):
        tagname = 'FERMI_ENERGY_UP'
        parsed_data[tagname.replace('-','_').lower()] = \
            parse_xml_child_float(tagname,target_tags) * CONSTANTS.hartree_to_ev
        parsed_data[tagname.lower() + units_suffix] = default_energy_units
        tagname = 'FERMI_ENERGY_DOWN'
        parsed_data[tagname.replace('-','_').lower()] = \
            parse_xml_child_float(tagname,target_tags) * CONSTANTS.hartree_to_ev
        parsed_data[tagname.lower() + units_suffix] = default_energy_units
    else:
        tagname = 'FERMI_ENERGY'
        parsed_data[tagname.replace('-','_').lower()] = \
            parse_xml_child_float(tagname,target_tags) * CONSTANTS.hartree_to_ev
        parsed_data[tagname.lower() + units_suffix] = default_energy_units

    #CARD MAGNETIZATION_INIT
    cardname = 'MAGNETIZATION_INIT'
    target_tags = read_xml_card(dom, cardname)

    # 0 if false
    tagname = 'CONSTRAINT_MAG'
    parsed_data[tagname.lower()] = parse_xml_child_integer(
        tagname, target_tags)

    vec1 = []
    vec2 = []
    vec3 = []
    for i in range(structure_dict['number_of_species']):
        tagname = 'SPECIE.' + str(i + 1)
        #a=target_tags.getElementsByTagName(tagname)[0]
        a = [_ for _ in target_tags.childNodes if _.nodeName == tagname][0]
        tagname2 = 'STARTING_MAGNETIZATION'
        vec1.append(parse_xml_child_float(tagname2, a))
        tagname2 = 'ANGLE1'
        vec2.append(parse_xml_child_float(tagname2, a))
        tagname2 = 'ANGLE2'
        vec3.append(parse_xml_child_float(tagname2, a))
    parsed_data['starting_magnetization'] = vec1
    parsed_data['magnetization_angle1'] = vec2
    parsed_data['magnetization_angle2'] = vec3

    #CARD OCCUPATIONS
    cardname = 'OCCUPATIONS'
    target_tags = read_xml_card(dom, cardname)
    for tagname in [
            'SMEARING_METHOD', 'TETRAHEDRON_METHOD', 'FIXED_OCCUPATIONS'
    ]:
        parsed_data[tagname.lower()] = parse_xml_child_bool(
            tagname, target_tags)
    if parsed_data['smearing_method']:
        parsed_data['occupations'] = 'smearing'
    elif parsed_data['tetrahedron_method']:
        parsed_data[
            'occupations'] = 'tetrahedra'  # TODO: might also be tetrahedra_lin or tetrahedra_opt: check input?
    elif parsed_data['fixed_occupations']:
        parsed_data['occupations'] = 'fixed'

    # Remove the following deprecated keys
    for tagname in [
            'SMEARING_METHOD', 'TETRAHEDRON_METHOD', 'FIXED_OCCUPATIONS'
    ]:
        parsed_data.pop(tagname.lower())

    #CARD CHARGE-DENSITY
    cardname = 'CHARGE-DENSITY'
    target_tags = read_xml_card(dom, cardname)
    try:
        attrname = 'iotk_link'
        value = str(target_tags.getAttribute(attrname)).rstrip().replace(
            '\n', '').lower()
        parsed_data[cardname.lower().rstrip().replace('-', '_')] = value
    except Exception:
        raise QEOutputParsingError('Error parsing attribute {},'.format(attrname) + \
                                   ' card {}.'.format(cardname))

    #CARD EIGENVALUES
    # Note: if this card is parsed, the dimension of the database grows very much!
    cardname = 'EIGENVALUES'
    target_tags = read_xml_card(dom, cardname)
    bands_dict = {}
    if dir_with_bands:
        try:
            occupations1 = []
            occupations2 = []
            bands1 = []
            bands2 = []
            for i in range(parsed_data['number_of_k_points']):
                tagname = 'K-POINT.' + str(i + 1)
                #a=target_tags.getElementsByTagName(tagname)[0]
                a = [
                    _ for _ in target_tags.childNodes if _.nodeName == tagname
                ][0]

                def read_bands_and_occupations(eigenval_n):
                    # load the eigenval.xml file
                    with open(eigenval_n, 'r') as eigenval_f:
                        f = eigenval_f.read()

                    eig_dom = parseString(f)

                    tagname = 'UNITS_FOR_ENERGIES'
                    a = eig_dom.getElementsByTagName(tagname)[0]
                    attrname = 'UNITS'
                    metric = str(a.getAttribute(attrname))
                    if metric not in ['Hartree']:
                        raise QEOutputParsingError('Error parsing eigenvalues xml file, ' + \
                                                   'units {} not implemented.'.format(metric))

                    tagname = 'EIGENVALUES'
                    a = eig_dom.getElementsByTagName(tagname)[0]
                    b = a.childNodes[0]
                    value_e = [
                        float(s) * CONSTANTS.hartree_to_ev
                        for s in b.data.split()
                    ]

                    tagname = 'OCCUPATIONS'
                    a = eig_dom.getElementsByTagName(tagname)[0]
                    b = a.childNodes[0]
                    value_o = [float(s) for s in b.data.split()]
                    return value_e, value_o

                # two cases: in cases of magnetic calculations, I have both spins
                try:
                    tagname2 = 'DATAFILE'
                    b = a.getElementsByTagName(tagname2)[0]
                    attrname = 'iotk_link'
                    value = str(b.getAttribute(attrname)).rstrip().replace(
                        '\n', '')
                    eigenval_n = os.path.join(dir_with_bands, value)

                    value_e, value_o = read_bands_and_occupations(eigenval_n)
                    bands1.append(value_e)
                    occupations1.append(value_o)

                except IndexError:
                    tagname2 = 'DATAFILE.1'
                    b1 = a.getElementsByTagName(tagname2)[0]
                    tagname2 = 'DATAFILE.2'
                    b2 = a.getElementsByTagName(tagname2)[0]
                    attrname = 'iotk_link'
                    value1 = str(b1.getAttribute(attrname)).rstrip().replace(
                        '\n', '')
                    value2 = str(b2.getAttribute(attrname)).rstrip().replace(
                        '\n', '')

                    eigenval_n = os.path.join(dir_with_bands, value1)
                    value_e, value_o = read_bands_and_occupations(eigenval_n)
                    bands1.append(value_e)
                    occupations1.append(value_o)

                    eigenval_n = os.path.join(dir_with_bands, value2)
                    value_e, value_o = read_bands_and_occupations(eigenval_n)
                    bands2.append(value_e)
                    occupations2.append(value_o)

            occupations = [occupations1]
            bands = [bands1]
            if occupations2:
                occupations.append(occupations2)
            if bands2:
                bands.append(bands2)

            bands_dict['occupations'] = occupations
            bands_dict['bands'] = bands
            bands_dict['bands' + units_suffix] = default_energy_units
        except Exception as exception:
            raise QEOutputParsingError('Error parsing card {}: {} {}'.format(
                tagname, exception.__class__.__name__, exception))


#     if dir_with_bands:
#         # if there is at least an empty band:
#         if parsed_data['smearing_method'] or  \
#            parsed_data['number_of_electrons']/2. < parsed_data['number_of_bands']:
#
#             #TODO: currently I do it only for non magnetic systems
#             if len(bands_dict['occupations'])==1:
#             # initialize lumo
#                 lumo = parsed_data['h**o']+10000.0
#                 for list_bands in bands_dict['bands']:
#                     for value in list_bands:
#                         if (value > parsed_data['fermi_energy']) and (value<lumo):
#                             lumo=value
#                 if (lumo==parsed_data['h**o']+10000.0) or lumo<=parsed_data['fermi_energy']:
#                     #might be an error for bandgap larger than 10000 eV...
#                     raise QEOutputParsingError('Error while searching for LUMO.')
#                 parsed_data['lumo']=lumo
#                 parsed_data['lumo'+units_suffix] = default_energy_units

# CARD symmetries
    parsed_data = copy.deepcopy(xml_card_symmetries(parsed_data, dom))

    # CARD EXCHANGE_CORRELATION
    parsed_data = copy.deepcopy(xml_card_exchangecorrelation(parsed_data, dom))

    parsed_data['bands'] = bands_dict
    parsed_data['structure'] = structure_dict

    return parsed_data, logs
Пример #10
0
    def parse_stdout(self, stdout_str):
        """
        Parses the output written to StdOut to retrieve basic information about the post processing

        :param stdout_str: the stdout file read in as a single string
        """
        def detect_important_message(logs, line):
            """
            Detect know errors and warnings printed in the stdout

            :param logs:
            :param line: a line from the stdout as a string
            """
            message_map = {
                'error': {
                    'xml data file not found': 'ERROR_PARENT_XML_MISSING'
                },
                'warning': {
                    'Warning:': None,
                    'DEPRECATED:': None,
                }
            }

            # Match any known error and warning messages
            for marker, message in message_map['error'].items():
                if marker in line:
                    if message is None:
                        message = line
                    logs.error.append(message)

            for marker, message in message_map['warning'].items():
                if marker in line:
                    if message is None:
                        message = line
                    logs.warning.append(message)

        stdout_lines = stdout_str.splitlines()
        logs = get_logging_container()
        output_dict = {}

        # Check for job completion, indicating that pp.x exited without interruption, even if there was an error.
        for line in stdout_lines:
            if 'JOB DONE' in line:
                break
        else:
            logs.error.append('ERROR_OUTPUT_STDOUT_INCOMPLETE')

        # Detect any issues and detect job completion
        for line in stdout_lines:
            detect_important_message(logs, line)

        # Parse useful data from stdout
        for line in stdout_lines:
            if 'Check:' in line:  # QE < 6.5
                split_line = line.split('=')
                if 'negative/imaginary' in line:  # QE6.1-6.3
                    output_dict['negative_core_charge'] = float(
                        split_line[-1].split()[0])
                    output_dict['imaginary_core_charge'] = float(
                        split_line[-1].split()[-1])
                else:  # QE6.4
                    output_dict['negative_core_charge'] = float(split_line[1])
            if 'Min, Max, imaginary charge:' in line:
                split_line = line.split()
                output_dict['charge_min'] = float(split_line[-3])
                output_dict['charge_max'] = float(split_line[-2])
                output_dict['charge_img'] = float(split_line[-1])
            if 'plot_num = ' in line:
                output_dict['plot_num'] = int(line.split('=')[1])
            if 'Plot Type:' in line:
                output_dict['plot_type'] = line.split(
                    'Output format')[0].split(':')[-1].strip()
                output_dict['output_format'] = line.split(':')[-1].strip()

        return logs, output_dict
Пример #11
0
def parse_neb_text_output(data, input_dict={}):
    """Parses the text output of QE Neb.

    :param data: a string, the file as read by read()
    :param input_dict: dictionary with the input parameters

    :return parsed_data: dictionary with key values, referring to quantities
                         at the last step.
    :return iteration_data: key,values referring to intermediate iterations.
                             Empty dictionary if no value is present.
    :return critical_messages: a list with critical messages. If any is found in
                               parsed_data['warnings'], the calculation is FAILED!
    """
    from aiida_quantumespresso.parsers.parse_raw import parse_output_error
    from aiida_quantumespresso.utils.mapping import get_logging_container
    from collections import defaultdict

    # TODO: find a more exhaustive list of the common errors of neb

    # critical warnings: if any is found, the calculation status is FAILED
    critical_warnings = {
        'scf convergence NOT achieved on image':
        'SCF did not converge for a given image',
        'Maximum CPU time exceeded':
        'Maximum CPU time exceeded',
        'reached the maximum number of steps':
        'Maximum number of iterations reached in the image optimization',
    }

    minor_warnings = {
        'Warning:': None,
    }

    all_warnings = dict(
        list(critical_warnings.items()) + list(minor_warnings.items()))

    parsed_data = {}
    parsed_data['warnings'] = []
    iteration_data = defaultdict(list)

    # parse time, starting from the end
    # apparently, the time is written multiple times
    for line in reversed(data.split('\n')):
        if 'NEB' in line and 'WALL' in line:
            try:
                time = line.split('CPU')[1].split('WALL')[0].strip()
                parsed_data['wall_time'] = time
            except Exception:
                parsed_data['warnings'].append(
                    'Error while parsing wall time.')

            try:
                parsed_data['wall_time_seconds'] = \
                    convert_qe_time_to_sec(parsed_data['wall_time'])
            except ValueError:
                raise QEOutputParsingError(
                    'Unable to convert wall_time in seconds.')
            break

    # set by default the calculation as not converged.
    parsed_data['converged'] = [False, 0]
    logs = get_logging_container()
    lines = data.split('\n')

    for count, line in enumerate(lines):
        if 'initial path length' in line:
            initial_path_length = float(line.split('=')[1].split('bohr')[0])
            parsed_data[
                'initial_path_length'] = initial_path_length * bohr_to_ang
        elif 'initial inter-image distance' in line:
            initial_image_dist = float(line.split('=')[1].split('bohr')[0])
            parsed_data[
                'initial_image_dist'] = initial_image_dist * bohr_to_ang
        elif 'string_method' in line:
            parsed_data['string_method'] = line.split('=')[1].strip()
        elif 'restart_mode' in line:
            parsed_data['restart_mode'] = line.split('=')[1].strip()
        elif 'opt_scheme' in line:
            parsed_data['opt_scheme'] = line.split('=')[1].strip()
        elif 'num_of_images' in line:
            parsed_data['num_of_images'] = int(line.split('=')[1])
        elif 'nstep_path' in line:
            parsed_data['nstep_path'] = int(line.split('=')[1])
        elif 'CI_scheme' in line:
            parsed_data['ci_scheme'] = line.split('=')[1].strip()
        elif 'first_last_opt' in line:
            parsed_data['first_last_opt'] = True if line.split(
                '=')[1] == 'T' else False
        elif 'use_freezing' in line:
            parsed_data['use_freezing'] = True if line.split(
                '=')[1] == 'T' else False
        elif ' ds ' in line:
            parsed_data['ds_au'] = float(line.split('=')[1].split('a.u.')[0])
        elif '   k_max' in line:
            parsed_data['k_max'] = float(line.split('=')[1].split('a.u.')[0])
        elif '   k_min_au' in line:
            parsed_data['k_min_au'] = float(
                line.split('=')[1].split('a.u.')[0])
        elif 'suggested k_max' in line:
            parsed_data['suggested_k_max_au'] = float(
                line.split('=')[1].split('a.u.')[0])
        elif 'suggested k_min' in line:
            parsed_data['suggested_k_min_au'] = float(
                line.split('=')[1].split('a.u.')[0])
        elif 'path_thr' in line:
            parsed_data['path_thr'] = float(line.split('=')[1].split('eV')[0])
        elif 'list of climbing images' in line:
            parsed_data['climbing_images_manual'] = [
                int(_) for _ in line.split(':')[1].split(',')[:-1]
            ]
        elif 'neb: convergence achieved in' in line:
            parsed_data['converged'] = [
                True, int(line.split('iteration')[0].split()[-1])
            ]
        elif '%%%%%%%%%%%%%%' in line:
            parse_output_error(lines, count, logs)
        elif any(i in line for i in all_warnings):
            message = [
                all_warnings[i] for i in all_warnings.keys() if i in line
            ][0]

            if message is not None:
                parsed_data['warnings'].append(message)

    parsed_data['warnings'].extend(logs.error)

    try:
        num_images = parsed_data['num_of_images']
    except KeyError:
        try:
            num_images = input_dict['PATH']['num_of_images']
        except KeyError:
            raise QEOutputParsingError(
                'No information on the number '
                'of images available (neither in input nor in output')

    iteration_lines = data.split('-- iteration')[1:]
    iteration_lines = [i.split('\n') for i in iteration_lines]

    for iteration in iteration_lines:
        for count, line in enumerate(iteration):
            if 'activation energy (->)' in line:
                activ_energy = float(line.split('=')[1].split('eV')[0])
                iteration_data['forward_activation_energy'].append(
                    activ_energy)
            elif 'activation energy (<-)' in line:
                activ_energy = float(line.split('=')[1].split('eV')[0])
                iteration_data['backward_activation_energy'].append(
                    activ_energy)
            elif 'image        energy (eV)        error (eV/A)        frozen' in line:
                energies = []
                forces = []
                frozen = []
                try:
                    for i in range(num_images):
                        split_line = iteration[count + 2 + i].split()[1:]
                        energies.append(float(split_line[0]))
                        forces.append(float(split_line[1]))
                        frozen.append(True if split_line[2] == 'T' else False)
                    iteration_data['image_energies'].append(energies)
                    iteration_data['image_forces'].append(forces)
                    iteration_data['image_frozen'].append(frozen)
                except Exception:
                    parsed_data['warnings'].append(
                        'Error while parsing the image energies and forces.')
            elif 'climbing image' in line:
                iteration_data['climbing_image_auto'].append(
                    [int(_) for _ in line.split('=')[1].split(',')])
            elif 'path length' in line:
                path_length = float(line.split('=')[1].split('bohr')[0])
                iteration_data['path_length'].append(path_length * bohr_to_ang)
            elif 'inter-image distance' in line:
                image_dist = float(line.split('=')[1].split('bohr')[0])
                iteration_data['image_dist'].append(image_dist * bohr_to_ang)

    return parsed_data, dict(iteration_data), list(critical_warnings.values())