def parse_stdout(self): """Parse the stdout file of pw2gw to build the `output_parameters` node.""" from aiida_quantumespresso.utils.mapping import get_logging_container from aiida_quantumespresso.parsers.parse_raw.pw2gw import parse_stdout logs = get_logging_container() parsed_data = {} filename_stdout = self.node.get_attribute('output_filename') if filename_stdout not in self.retrieved.list_object_names(): self.exit_code_stdout = self.exit_codes.ERROR_OUTPUT_STDOUT_MISSING return parsed_data, logs try: stdout = self.retrieved.get_object_content(filename_stdout) except IOError: self.exit_code_stdout = self.exit_codes.ERROR_OUTPUT_STDOUT_READ return parsed_data, logs try: parsed_data, logs = parse_stdout(stdout) except Exception: import traceback traceback.print_exc() self.exit_code_stdout = self.exit_codes.ERROR_UNEXPECTED_PARSER_EXCEPTION # If the stdout was incomplete, most likely the job was interrupted before it could cleanly finish, so the # output files are most likely corrupt and cannot be restarted from if 'ERROR_OUTPUT_STDOUT_INCOMPLETE' in logs['error']: self.exit_code_stdout = self.exit_codes.ERROR_OUTPUT_STDOUT_INCOMPLETE return parsed_data, logs
def parse_stdout(self, parameters, parser_options=None, parsed_xml=None): """Parse the stdout output file. :param parameters: the input parameters dictionary :param parser_options: optional dictionary with parser options :param parsed_xml: the raw parsed data from the XML output :return: tuple of two dictionaries, first with raw parsed data and second with log messages """ from aiida_quantumespresso.parsers.parse_raw.pw import parse_stdout logs = get_logging_container() parsed_data = {} filename_stdout = self.node.get_attribute('output_filename') if filename_stdout not in self.retrieved.list_object_names(): self.exit_code_stdout = self.exit_codes.ERROR_OUTPUT_STDOUT_MISSING return parsed_data, logs try: stdout = self.retrieved.get_object_content(filename_stdout) except IOError: self.exit_code_stdout = self.exit_codes.ERROR_OUTPUT_STDOUT_READ return parsed_data, logs try: parsed_data, logs = parse_stdout(stdout, parameters, parser_options, parsed_xml) except Exception: logs.critical.append(traceback.format_exc()) self.exit_code_stdout = self.exit_codes.ERROR_UNEXPECTED_PARSER_EXCEPTION # If the stdout was incomplete, most likely the job was interrupted before it could cleanly finish, so the # output files are most likely corrupt and cannot be restarted from if 'ERROR_OUTPUT_STDOUT_INCOMPLETE' in logs['error']: self.exit_code_stdout = self.exit_codes.ERROR_OUTPUT_STDOUT_INCOMPLETE # Under certain conditions, such as the XML missing or being incorrect, the structure data might be incomplete. # Since following code depends on it, we replace missing information taken from the input structure. structure = self.node.inputs.structure parsed_data.setdefault('structure', {}).setdefault('cell', {}) if 'lattice_vectors' not in parsed_data['structure']['cell']: parsed_data['structure']['cell'][ 'lattice_vectors'] = structure.cell if 'atoms' not in parsed_data['structure']['cell']: symbols = { s.kind_name: structure.get_kind(s.kind_name).symbol for s in structure.sites } parsed_data['structure']['cell']['atoms'] = [ (symbols[s.kind_name], s.position) for s in structure.sites ] return parsed_data, logs
def parse_output_base(filecontent, codename=None, message_map=None): """Parses the output file of a QE calculation, just checking for basic content like JOB DONE, errors with %%%% etc. :param filecontent: a string with the output file content :param codename: the string printed both in the header and near the walltime. If passed, a few more things are parsed (e.g. code version, walltime, ...) :returns: tuple of two dictionaries, with the parsed data and log messages, respectively """ from aiida_quantumespresso.utils.mapping import get_logging_container keys = ['error', 'warning'] if message_map is not None and (not isinstance(message_map, dict) or any(key not in message_map for key in keys)): raise RuntimeError('invalid format `message_map`: should be dictionary with two keys {}'.format(keys)) logs = get_logging_container() parsed_data = get_parser_info(parser_info_template='aiida-quantumespresso parser simple v{}') lines = filecontent if isinstance(filecontent, list) else filecontent.split('\n') for line in lines: if 'JOB DONE' in line: break else: logs.error.append('ERROR_OUTPUT_STDOUT_INCOMPLETE') if codename is not None: codestring = 'Program {}'.format(codename) for line_number, line in enumerate(lines): if codestring in line and 'starts on' in line: parsed_data['code_version'] = line.split(codestring)[1].split('starts on')[0].strip() # Parse the walltime if codename in line and 'WALL' in line: try: time = line.split('CPU')[1].split('WALL')[0].strip() parsed_data['wall_time'] = time except (ValueError, IndexError): logs.warnings.append('ERROR_PARSING_WALLTIME') else: try: parsed_data['wall_time_seconds'] = convert_qe_time_to_sec(time) except ValueError: logs.warnings.append('ERROR_CONVERTING_WALLTIME_TO_SECONDS') # Parse an error message with optional mapping of the message if '%%%%%%%%%%%%%%' in line: parse_output_error(lines, line_number, logs, message_map) return parsed_data, logs
def parse_xml(self, dir_with_bands=None, parser_options=None): """Parse the XML output file. :param dir_with_bands: absolute path to directory containing individual k-point XML files for old XML format. :param parser_options: optional dictionary with parser options :return: tuple of two dictionaries, first with raw parsed data and second with log messages """ from .parse_xml.exceptions import XMLParseError, XMLUnsupportedFormatError from .parse_xml.pw.parse import parse_xml logs = get_logging_container() parsed_data = {} object_names = self.retrieved.list_object_names() xml_files = [ xml_file for xml_file in self.node.process_class.xml_filenames if xml_file in object_names ] if not xml_files: if not self.node.get_option('without_xml'): self.exit_code_xml = self.exit_codes.ERROR_OUTPUT_XML_MISSING return parsed_data, logs if len(xml_files) > 1: self.exit_code_xml = self.exit_codes.ERROR_OUTPUT_XML_MULTIPLE return parsed_data, logs try: with self.retrieved.open(xml_files[0]) as xml_file: parsed_data, logs = parse_xml(xml_file, dir_with_bands) except IOError: self.exit_code_xml = self.exit_codes.ERROR_OUTPUT_XML_READ except XMLParseError: self.exit_code_xml = self.exit_codes.ERROR_OUTPUT_XML_PARSE except XMLUnsupportedFormatError: self.exit_code_xml = self.exit_codes.ERROR_OUTPUT_XML_FORMAT except Exception: logs.critical.append(traceback.format_exc()) self.exit_code_xml = self.exit_codes.ERROR_UNEXPECTED_PARSER_EXCEPTION return parsed_data, logs
def parse_xml(xml_file, dir_with_bands=None, include_deprecated_v2_keys=False): try: xml_parsed = ElementTree.parse(xml_file) except ElementTree.ParseError: raise XMLParseError('error while parsing XML file') xml_file_version = get_xml_file_version(xml_parsed) try: if xml_file_version == QeXmlVersion.POST_6_2: parsed_data, logs = parse_pw_xml_post_6_2( xml_parsed, include_deprecated_v2_keys) elif xml_file_version == QeXmlVersion.PRE_6_2: xml_file.seek(0) parsed_data, logs = parse_pw_xml_pre_6_2( xml_file, dir_with_bands, include_deprecated_v2_keys) except Exception: import traceback logs = get_logging_container() logs.critical.append(traceback.format_exc()) parsed_data = {} return parsed_data, logs
def parse_stdout(stdout, input_parameters, parser_options=None, parsed_xml=None): """Parses the stdout content of a Quantum ESPRESSO `pw.x` calculation. :param stdout: the stdout content as a string :param input_parameters: dictionary with the input parameters :param parser_options: the parser options from the settings input parameter node :param parsed_xml: dictionary with data parsed from the XML output file :returns: tuple of two dictionaries, with the parsed data and log messages, respectively """ if parser_options is None: parser_options = {} if parsed_xml is None: parsed_xml = {} # Separate the input string into separate lines data_lines = stdout.split('\n') logs = get_logging_container() parsed_data = {} vdw_correction = False bands_data = parsed_xml.pop('bands', {}) structure_data = parsed_xml.pop('structure', {}) trajectory_data = {} maximum_ionic_steps = None marker_bfgs_converged = False # First check whether the `JOB DONE` message was written, otherwise the job was interrupted for line in data_lines: if 'JOB DONE' in line: break else: logs.error.append('ERROR_OUTPUT_STDOUT_INCOMPLETE') # Determine whether the input switched on an electric field lelfield = input_parameters.get('CONTROL', {}).get('lelfield', False) # Find some useful quantities. if not parsed_xml.get('number_of_bands', None): try: for line in stdout.split('\n'): if 'lattice parameter (alat)' in line: alat = float(line.split('=')[1].split('a.u')[0]) elif 'number of atoms/cell' in line: nat = int(line.split('=')[1]) elif 'number of atomic types' in line: ntyp = int(line.split('=')[1]) elif 'unit-cell volume' in line: if '(a.u.)^3' in line: volume = float(line.split('=')[1].split('(a.u.)^3')[0]) else: # occurs in v5.3.0 volume = float(line.split('=')[1].split('a.u.^3')[0]) elif 'number of Kohn-Sham states' in line: nbnd = int(line.split('=')[1]) elif 'number of k points' in line: nk = int(line.split('=')[1].split()[0]) if input_parameters.get('SYSTEM', {}).get('nspin', 1) > 1: # QE counts twice each k-point in spin-polarized calculations nk /= 2 elif 'Dense grid' in line: FFT_grid = [ int(g) for g in line.split('(')[1].split(')')[0].split(',') ] elif 'Smooth grid' in line: smooth_FFT_grid = [ int(g) for g in line.split('(')[1].split(')')[0].split(',') ] break alat *= bohr_to_ang volume *= bohr_to_ang**3 parsed_data['lattice_parameter_initial'] = alat parsed_data['number_of_bands'] = nbnd try: parsed_data['number_of_k_points'] = nk parsed_data['fft_grid'] = FFT_grid parsed_data['smooth_fft_grid'] = smooth_FFT_grid except NameError: # these are not crucial, so parsing does not fail if they are not found pass except NameError: # nat or other variables where not found, and thus not initialized # Try to get some error messages lines = stdout.split('\n') for line_number, line in enumerate(lines): # Compare the line to the known set of error and warning messages and add them to the log container detect_important_message(logs, line) if len(logs.error) or len(logs.warning) > 0: parsed_data['trajectory'] = trajectory_data return parsed_data, logs # did not find any error message -> raise an Error and do not return anything raise QEOutputParsingError('Parser cannot load basic info.') else: nat = structure_data['number_of_atoms'] ntyp = structure_data['number_of_species'] alat = structure_data['lattice_parameter_xml'] volume = structure_data['cell']['volume'] # NOTE: lattice_parameter_xml is the lattice parameter of the xml file # in the units used by the code. lattice_parameter instead in angstroms. # Save these two quantities in the parsed_data, because they will be # useful for queries (maybe), and structure_data will not be stored as a Dict parsed_data['number_of_atoms'] = nat parsed_data['number_of_species'] = ntyp parsed_data['volume'] = volume c_bands_error = False # now grep quantities that can be considered isolated informations. for count, line in enumerate(data_lines): # Compare the line to the known set of error and warning messages and add them to the log container detect_important_message(logs, line) # to be used for later if 'Carrying out vdW-DF run using the following parameters:' in line: vdw_correction = True elif 'Cartesian axes' in line: # this is the part when initial positions and chemical # symbols are printed (they do not change during a run) i = count + 1 while i < count + 10 and not ('site n.' in data_lines[i] and 'atom' in data_lines[i]): i += 1 if 'site n.' in data_lines[i] and 'atom' in data_lines[i]: trajectory_data['atomic_species_name'] = [ data_lines[i + 1 + j].split()[1] for j in range(nat) ] # parse the initialization time (take only first occurence) elif ('init_wall_time_seconds' not in parsed_data and 'total cpu time spent up to now is' in line): init_time = float( line.split('total cpu time spent up to now is')[1].split( 'secs')[0]) parsed_data['init_wall_time_seconds'] = init_time # parse dynamical RAM estimates elif 'Estimated max dynamical RAM per process' in line: value = line.split('>')[-1] match = re.match( r'\s+([+-]?\d+(\.\d*)?|\.\d+([eE][+-]?\d+)?)\s*(Mb|MB|GB)', value) if match: try: parsed_data['estimated_ram_per_process'] = float( match.group(1)) parsed_data['estimated_ram_per_process{}'.format( units_suffix)] = match.group(4) except (IndexError, ValueError): pass # parse dynamical RAM estimates elif 'Estimated total dynamical RAM' in line: value = line.split('>')[-1] match = re.match( r'\s+([+-]?\d+(\.\d*)?|\.\d+([eE][+-]?\d+)?)\s*(Mb|MB|GB)', value) if match: try: parsed_data['estimated_ram_total'] = float(match.group(1)) parsed_data['estimated_ram_total{}'.format( units_suffix)] = match.group(4) except (IndexError, ValueError): pass # parse the global file, for informations that are written only once elif 'PWSCF' in line and 'WALL' in line: try: time = line.split('CPU')[1].split('WALL')[0] parsed_data['wall_time'] = time except Exception: logs.warning.append('Error while parsing wall time.') try: parsed_data['wall_time_seconds'] = convert_qe_time_to_sec(time) except ValueError: raise QEOutputParsingError( 'Unable to convert wall_time in seconds.') # for later control on relaxation-dynamics convergence elif 'nstep' in line and '=' in line: maximum_ionic_steps = int(line.split()[2]) elif 'bfgs converged in' in line: marker_bfgs_converged = True elif 'number of bfgs steps' in line: try: parsed_data['number_ionic_steps'] += 1 except KeyError: parsed_data['number_ionic_steps'] = 1 elif 'A final scf calculation at the relaxed structure' in line: parsed_data['final_scf'] = True elif 'point group' in line: if 'k-point group' not in line: try: # Split line in components delimited by either space(s) or # parenthesis and filter out empty strings line_elems = [ _f for _f in re.split(r' +|\(|\)', line) if _f ] pg_international = line_elems[-1] pg_schoenflies = line_elems[-2] parsed_data['pointgroup_international'] = pg_international parsed_data['pointgroup_schoenflies'] = pg_schoenflies except Exception: warning = 'Problem parsing point group, I found: {}'.format( line.strip()) logs.warning.append(warning) # special parsing of c_bands error elif 'c_bands' in line and 'eigenvalues not converged' in line: c_bands_error = True elif 'iteration #' in line: if 'Calculation restarted' not in line and 'Calculation stopped' not in line: try: parsed_data['total_number_of_scf_iterations'] += 1 except KeyError: parsed_data['total_number_of_scf_iterations'] = 1 if c_bands_error: # if there is another iteration, c_bands is not necessarily a problem # I put a warning only if c_bands error appears in the last iteration c_bands_error = False if c_bands_error: logs.warning.append('c_bands: at least 1 eigenvalues not converged') # I split the output text in the atomic SCF calculations. # the initial part should be things already contained in the xml. # (cell, initial positions, kpoints, ...) and I skip them. # In case, parse for them before this point. # Put everything in a trajectory_data dictionary relax_steps = stdout.split('Self-consistent Calculation')[1:] relax_steps = [i.split('\n') for i in relax_steps] # now I create a bunch of arrays for every step. for data_step in relax_steps: trajectory_frame = {} for count, line in enumerate(data_step): if 'CELL_PARAMETERS' in line: try: a1 = [float(s) for s in data_step[count + 1].split()] a2 = [float(s) for s in data_step[count + 2].split()] a3 = [float(s) for s in data_step[count + 3].split()] # try except indexerror for not enough lines lattice = line.split('(')[1].split(')')[0].split('=') if lattice[0].lower() not in ['alat', 'bohr', 'angstrom']: raise QEOutputParsingError( 'Error while parsing cell_parameters: ' + 'unsupported units {}'.format(lattice[0])) if 'alat' in lattice[0].lower(): a1 = [alat * bohr_to_ang * float(s) for s in a1] a2 = [alat * bohr_to_ang * float(s) for s in a2] a3 = [alat * bohr_to_ang * float(s) for s in a3] lattice_parameter_b = float(lattice[1]) if abs(lattice_parameter_b - alat) > lattice_tolerance: raise QEOutputParsingError( 'Lattice parameters mismatch! ' + '{} vs {}'.format(lattice_parameter_b, alat)) elif 'bohr' in lattice[0].lower(): lattice_parameter_b *= bohr_to_ang a1 = [bohr_to_ang * float(s) for s in a1] a2 = [bohr_to_ang * float(s) for s in a2] a3 = [bohr_to_ang * float(s) for s in a3] trajectory_data.setdefault('lattice_vectors_relax', []).append([a1, a2, a3]) except Exception: logs.warning.append( 'Error while parsing relaxation cell parameters.') elif 'ATOMIC_POSITIONS' in line: try: this_key = 'atomic_positions_relax' # the inizialization of tau prevent parsed_data to be associated # to the pointer of the previous iteration metric = line.split('(')[1].split(')')[0] if metric == 'crystal': this_key = 'atomic_fractionals_relax' elif metric not in ['alat', 'bohr', 'angstrom']: raise QEOutputParsingError( 'Error while parsing atomic_positions: units not supported.' ) # TODO: check how to map the atoms in the original scheme positions = [] for i in range(nat): line2 = data_step[count + 1 + i].split() tau = [float(s) for s in line2[1:4]] if metric == 'alat': tau = [alat * float(s) for s in tau] elif metric == 'bohr': tau = [bohr_to_ang * float(s) for s in tau] positions.append(tau) trajectory_data.setdefault(this_key, []).append(positions) except Exception: logs.warning.append( 'Error while parsing relaxation atomic positions.') # NOTE: in the above, the chemical symbols are not those of AiiDA # since the AiiDA structure is different. So, I assume now that the # order of atoms is the same of the input atomic structure. # Computed dipole correction in slab geometries. # save dipole in debye units, only at last iteration of scf cycle elif 'Computed dipole along edir' in line: j = count + 3 line2 = data_step[j] try: units = line2.split()[-1] if default_dipole_units.lower() not in units.lower( ): # only debye raise QEOutputParsingError( 'Error parsing the dipole correction. Units {} are not supported.' .format(units)) value = float(line2.split()[-2]) except IndexError: # on units pass # save only the last dipole correction while 'Computed dipole along edir' not in line2: j += 1 try: line2 = data_step[j] except IndexError: # The dipole is also written at the beginning of a new bfgs iteration break if 'End of self-consistent calculation' in line2: trajectory_data.setdefault('dipole', []).append(value) parsed_data['dipole' + units_suffix] = default_dipole_units break # saving the SCF convergence accuracy for each SCF cycle # If for some step this line is not printed, the later check with the scf_accuracy array length should catch it elif 'estimated scf accuracy' in line: try: value = float(line.split()[-2]) * ry_to_ev trajectory_data.setdefault('scf_accuracy', []).append(value) except Exception: logs.warning.append('Error while parsing scf accuracy.') elif 'convergence has been achieved in' in line or 'convergence NOT achieved after' in line: try: value = int(line.split('iterations')[0].split()[-1]) trajectory_data.setdefault('scf_iterations', []).append(value) except Exception: logs.warning.append('Error while parsing scf iterations.') elif 'Calculation stopped in scf loop at iteration' in line: try: value = int(line.split()[-1]) trajectory_data.setdefault('scf_iterations', []).append(value) except Exception: logs.warning.append('Error while parsing scf iterations.') elif 'End of self-consistent calculation' in line: # parse energy threshold for diagonalization algorithm try: j = 0 while True: j -= 1 line2 = data_step[count + j] if 'ethr' in line2: value = float(line2.split('=')[1].split(',')[0]) break trajectory_data.setdefault('energy_threshold', []).append(value) except Exception: logs.warning.append('Error while parsing ethr.') # parse final magnetic moments, if present try: j = 0 while True: j -= 1 line2 = data_step[count + j] if 'Magnetic moment per site' in line2: break if 'iteration' in line2: raise QEOutputParsingError mag_moments = [] charges = [] while True: j += 1 line2 = data_step[count + j] if 'atom:' in line2: mag_moments.append( float(line2.split('magn:')[1].split()[0])) charges.append( float(line2.split('charge:')[1].split()[0])) if len(mag_moments) == nat: break trajectory_data.setdefault('atomic_magnetic_moments', []).append(mag_moments) trajectory_data.setdefault('atomic_charges', []).append(charges) parsed_data['atomic_magnetic_moments' + units_suffix] = default_magnetization_units parsed_data['atomic_charges' + units_suffix] = default_charge_units except QEOutputParsingError: pass # grep energy and possibly, magnetization elif '!' in line: try: En = float(line.split('=')[1].split('Ry')[0]) * ry_to_ev E_acc = float( data_step[count + 2].split('<')[1].split('Ry')[0]) * ry_to_ev for key, value in [['energy', En], ['energy_accuracy', E_acc]]: trajectory_data.setdefault(key, []).append(value) parsed_data[key + units_suffix] = default_energy_units # TODO: decide units for magnetization. now bohr mag/cell j = 0 while True: j += 1 line2 = data_step[count + j] for string, key in [ [ 'one-electron contribution', 'energy_one_electron' ], ['hartree contribution', 'energy_hartree'], ['xc contribution', 'energy_xc'], ['ewald contribution', 'energy_ewald'], ['smearing contrib.', 'energy_smearing'], [ 'one-center paw contrib.', 'energy_one_center_paw' ], ['est. exchange err', 'energy_est_exchange'], ['Fock energy', 'energy_fock'], ['Hubbard energy', 'energy_hubbard'], # Add also ENVIRON specific contribution to the total energy ['solvation energy', 'energy_solvation'], ['cavitation energy', 'energy_cavitation'], ['PV energy', 'energy_pv'], [ 'periodic energy correct.', 'energy_pbc_correction' ], ['ionic charge energy', 'energy_ionic_charge'], [ 'external charges energy', 'energy_external_charges' ] ]: if string in line2: value = grep_energy_from_line(line2) trajectory_data.setdefault(key, []).append(value) parsed_data[ key + units_suffix] = default_energy_units # magnetizations if 'total magnetization' in line2: this_m = line2.split('=')[1].split('Bohr')[0] try: # magnetization might be a scalar value = float(this_m) except ValueError: # but can also be a three vector component in non-collinear calcs value = [float(i) for i in this_m.split()] trajectory_data.setdefault('total_magnetization', []).append(value) parsed_data[ 'total_magnetization' + units_suffix] = default_magnetization_units elif 'absolute magnetization' in line2: value = float(line2.split('=')[1].split('Bohr')[0]) trajectory_data.setdefault( 'absolute_magnetization', []).append(value) parsed_data[ 'absolute_magnetization' + units_suffix] = default_magnetization_units # exit loop elif 'convergence' in line2: break if vdw_correction: j = 0 while True: j += -1 line2 = data_step[count + j] if 'Non-local correlation energy' in line2: value = grep_energy_from_line(line2) trajectory_data.setdefault('energy_vdw', []).append(value) break parsed_data['energy_vdw' + units_suffix] = default_energy_units except Exception: logs.warning.append( 'Error while parsing for energy terms.') elif 'the Fermi energy is' in line: try: value = float(line.split('is')[1].split('ev')[0]) trajectory_data.setdefault('fermi_energy', []).append(value) parsed_data['fermi_energy' + units_suffix] = default_energy_units except Exception: logs.warning.append( 'Error while parsing Fermi energy from the output file.' ) elif 'Forces acting on atoms' in line: try: forces = [] j = 0 while True: j += 1 line2 = data_step[count + j] if 'atom ' in line2: line2 = line2.split('=')[1].split() # CONVERT FORCES IN eV/Ang vec = [ float(s) * ry_to_ev / bohr_to_ang for s in line2 ] forces.append(vec) if len(forces) == nat: break trajectory_data.setdefault('forces', []).append(forces) parsed_data['forces' + units_suffix] = default_force_units except Exception: logs.warning.append('Error while parsing forces.') # TODO: adding the parsing support for the decomposition of the forces elif 'Total force =' in line: try: # note that I can't check the units: not written in output! value = float(line.split('=')[1].split('Total') [0]) * ry_to_ev / bohr_to_ang trajectory_data.setdefault('total_force', []).append(value) parsed_data['total_force' + units_suffix] = default_force_units except Exception: logs.warning.append('Error while parsing total force.') elif ('entering subroutine stress ...' in line) or ('Computing stress (Cartesian axis) and pressure' in line): try: stress = [] for k in range(10 + 5 * vdw_correction): if 'P=' in data_step[count + k + 1]: count2 = count + k + 1 if '(Ry/bohr**3)' not in data_step[count2]: raise QEOutputParsingError( 'Error while parsing stress: unexpected units.') for k in range(3): line2 = data_step[count2 + k + 1].split() vec = [ float(s) * 10**(-9) * ry_si / (bohr_si)**3 for s in line2[0:3] ] stress.append(vec) trajectory_data.setdefault('stress', []).append(stress) parsed_data['stress' + units_suffix] = default_stress_units except Exception: logs.warning.append('Error while parsing stress tensor.') # Electronic and ionic dipoles when 'lelfield' was set to True in input parameters elif lelfield is True: if 'Electronic Dipole per cell' in line: electronic_dipole = float(line.split()[-1]) trajectory_frame.setdefault( 'electronic_dipole_cell_average', []).append(electronic_dipole) elif 'Ionic Dipole per cell' in line: ionic_dipole = float(line.split()[-1]) trajectory_frame.setdefault('ionic_dipole_cell_average', []).append(ionic_dipole) elif 'Electronic Dipole on Cartesian axes' in line: electronic_dipole = [ float(data_step[count + i + 1].split()[1]) for i in range(3) ] trajectory_frame.setdefault( 'electronic_dipole_cartesian_axes', []).append(electronic_dipole) elif 'Ionic Dipole on Cartesian axes' in line: ionic_dipole = [ float(data_step[count + i + 1].split()[1]) for i in range(3) ] trajectory_frame.setdefault('ionic_dipole_cartesian_axes', []).append(ionic_dipole) # End of trajectory frame, only keep last entries for dipole related values if lelfield is True: # For every property only get the last entry if possible try: ed_cell = trajectory_frame[ 'electronic_dipole_cell_average'].pop() except IndexError: ed_cell = None try: ed_axes = trajectory_frame[ 'electronic_dipole_cartesian_axes'].pop() except IndexError: ed_axes = None try: id_cell = trajectory_frame['ionic_dipole_cell_average'].pop() except IndexError: id_cell = None try: id_axes = trajectory_frame['ionic_dipole_cartesian_axes'].pop() except IndexError: id_axes = None # Only add them if all four properties were successfully parsed if all([ value is not None for value in [ed_cell, ed_axes, id_cell, id_axes] ]): trajectory_data.setdefault('electronic_dipole_cell_average', []).append(ed_cell) trajectory_data.setdefault('electronic_dipole_cartesian_axes', []).append(ed_axes) trajectory_data.setdefault('ionic_dipole_cell_average', []).append(id_cell) trajectory_data.setdefault('ionic_dipole_cartesian_axes', []).append(id_axes) # check consistency of scf_accuracy and scf_iterations if 'scf_accuracy' in trajectory_data: if 'scf_iterations' in trajectory_data: if len(trajectory_data['scf_accuracy']) != sum( trajectory_data['scf_iterations']): logs.warning.append( 'the length of scf_accuracy does not match the sum of the elements of scf_iterations.' ) else: logs.warning.append( '"the scf_accuracy array was parsed but the scf_iterations was not.' ) # If specified in the parser options, parse the atomic occupations parse_atomic_occupations = parser_options.get('parse_atomic_occupations', False) if parse_atomic_occupations: atomic_occupations = {} hubbard_blocks = stdout.split('LDA+U parameters') for line in hubbard_blocks[-1].split('\n'): if 'Tr[ns(na)]' in line: values = line.split('=') atomic_index = values[0].split()[1] occupations = values[1].split() if len(occupations) == 1: atomic_occupations[atomic_index] = { 'total': occupations[0] } elif len(occupations) == 3: atomic_occupations[atomic_index] = { 'up': occupations[0], 'down': occupations[1], 'total': occupations[2] } else: continue parsed_data['atomic_occupations'] = atomic_occupations # Ionic calculations and BFGS algorithm did not print that calculation is converged if 'atomic_positions_relax' in trajectory_data and not marker_bfgs_converged: logs.error.append('ERROR_IONIC_CONVERGENCE_NOT_REACHED') # Ionic calculation that hit the maximum number of ionic steps. Note: does not necessarily mean that convergence was # not reached as it could have occurred in the last step. if maximum_ionic_steps is not None and maximum_ionic_steps == parsed_data.get( 'number_ionic_steps', None): logs.warning.append('ERROR_MAXIMUM_IONIC_STEPS_REACHED') # Remove duplicate log messages by turning it into a set. Then convert back to list as that is what is expected logs.error = list(set(logs.error)) logs.warning = list(set(logs.warning)) parsed_data['bands'] = bands_data parsed_data['structure'] = structure_data parsed_data['trajectory'] = trajectory_data return parsed_data, logs
def parse_raw_ph_output(stdout, tensors=None, dynamical_matrices=None): """Parses the raw output of a Quantum ESPRESSO `ph.x` calculation. :param stdout: the content of the stdout file as a string :param tensors: the content of the tensors.xml file as a string :param dynamical_matrices: a list of the content of the dynamical matrix files as a string :returns: tuple of two dictionaries, with the parsed data and log messages, respectively """ logs = get_logging_container() data_lines = stdout.split('\n') # First check whether the `JOB DONE` message was written, otherwise the job was interrupted for line in data_lines: if 'JOB DONE' in line: break else: logs.error.append('ERROR_OUTPUT_STDOUT_INCOMPLETE') # Parse tensors, if present tensor_data = {} if tensors: try: tensor_data = parse_ph_tensor(tensors) except QEOutputParsingError: logs.warning.append('Error while parsing the tensor files') out_data = parse_ph_text_output(data_lines, logs) # parse dynamical matrices if present dynmat_data = {} if dynamical_matrices: # find lattice parameter for dynmat_counter, dynmat in enumerate(dynamical_matrices): lines = dynmat.split('\n') # check if the file contains frequencies (i.e. is useful) or not dynmat_to_parse = False if not lines: continue try: _ = [float(i) for i in lines[0].split()] except ValueError: dynmat_to_parse = True if not dynmat_to_parse: continue # parse it this_dynmat_data = parse_ph_dynmat(lines, logs) # join it with the previous dynmat info dynmat_data[ f'dynamical_matrix_{dynmat_counter}'] = this_dynmat_data # TODO: use the bands format? # join dictionaries, there should not be any twice repeated key for key in out_data.keys(): if key in list(tensor_data.keys()): raise AssertionError(f'{key} found in two dictionaries') if key in list(dynmat_data.keys()): raise AssertionError(f'{key} found in two dictionaries') # I don't check the dynmat_data and parser_info keys parsed_data = dict( list(dynmat_data.items()) + list(out_data.items()) + list(tensor_data.items())) return parsed_data, logs
def parse_xml_post_6_2(xml): """Parse the content of XML output file written by `pw.x` and `cp.x` with the new schema-based XML format. :param xml: parsed XML :returns: tuple of two dictionaries, with the parsed data and log messages, respectively """ e_bohr2_to_coulomb_m2 = 57.214766 # e/a0^2 to C/m^2 (electric polarization) from Wolfram Alpha logs = get_logging_container() schema_filepath = get_schema_filepath(xml) try: xsd = XMLSchema(schema_filepath) except URLError: # If loading the XSD file specified in the XML file fails, we try the default schema_filepath_default = get_default_schema_filepath() try: xsd = XMLSchema(schema_filepath_default) except URLError: raise XMLParseError( f'Could not open or parse the XSD files {schema_filepath} and {schema_filepath_default}' ) else: schema_filepath = schema_filepath_default # Validate XML document against the schema # Returned dictionary has a structure where, if tag ['key'] is "simple", xml_dictionary['key'] returns its content. # Otherwise, the following keys are available: # # xml_dictionary['key']['$'] returns its content # xml_dictionary['key']['@attr'] returns its attribute 'attr' # xml_dictionary['key']['nested_key'] goes one level deeper. xml_dictionary, errors = xsd.to_dict(xml, validation='lax') if errors: logs.error.append( f'{len(errors)} XML schema validation error(s) schema: {schema_filepath}:' ) for err in errors: logs.error.append(str(err)) xml_version = StrictVersion( xml_dictionary['general_info']['xml_format']['@VERSION']) inputs = xml_dictionary.get('input', {}) outputs = xml_dictionary['output'] lattice_vectors = [ [ x * CONSTANTS.bohr_to_ang for x in outputs['atomic_structure']['cell']['a1'] ], [ x * CONSTANTS.bohr_to_ang for x in outputs['atomic_structure']['cell']['a2'] ], [ x * CONSTANTS.bohr_to_ang for x in outputs['atomic_structure']['cell']['a3'] ], ] has_electric_field = inputs.get('electric_field', {}).get('electric_potential', None) == 'sawtooth_potential' has_dipole_correction = inputs.get('electric_field', {}).get('dipole_correction', False) if 'occupations' in inputs.get('bands', {}): try: occupations = inputs['bands']['occupations']['$'] # yapf: disable except TypeError: # "string indices must be integers" -- might have attribute 'nspin' occupations = inputs['bands']['occupations'] else: occupations = None starting_magnetization = [] magnetization_angle1 = [] magnetization_angle2 = [] for specie in outputs['atomic_species']['species']: starting_magnetization.append(specie.get('starting_magnetization', 0.0)) magnetization_angle1.append(specie.get('magnetization_angle1', 0.0)) magnetization_angle2.append(specie.get('magnetization_angle2', 0.0)) constraint_mag = 0 spin_constraints = inputs.get('spin_constraints', {}).get('spin_constraints', None) if spin_constraints == 'atomic': constraint_mag = 1 elif spin_constraints == 'atomic direction': constraint_mag = 2 elif spin_constraints == 'total': constraint_mag = 3 elif spin_constraints == 'total direction': constraint_mag = 6 lsda = inputs.get('spin', {}).get('lsda', False) spin_orbit_calculation = inputs.get('spin', {}).get('spinorbit', False) non_colinear_calculation = outputs['magnetization']['noncolin'] do_magnetization = outputs['magnetization']['do_magnetization'] # Time reversal symmetry of the system if non_colinear_calculation and do_magnetization: time_reversal = False else: time_reversal = True # If no specific tags are present, the default is 1 if non_colinear_calculation or spin_orbit_calculation: nspin = 4 elif lsda: nspin = 2 else: nspin = 1 symmetries = [] lattice_symmetries = [ ] # note: will only contain lattice symmetries that are NOT crystal symmetries inversion_symmetry = False # See also PW/src/setup.f90 nsym = outputs.get('symmetries', {}).get('nsym', None) # crystal symmetries nrot = outputs.get('symmetries', {}).get('nrot', None) # lattice symmetries for symmetry in outputs.get('symmetries', {}).get('symmetry', []): # There are two types of symmetries, lattice and crystal. The pure inversion (-I) is always a lattice symmetry, # so we don't care. But if the pure inversion is also a crystal symmetry, then then the system as a whole # has (by definition) inversion symmetry; so we set the global property inversion_symmetry = True. symmetry_type = symmetry['info']['$'] symmetry_name = symmetry['info']['@name'] if symmetry_type == 'crystal_symmetry' and symmetry_name.lower( ) == 'inversion': inversion_symmetry = True sym = { 'rotation': [ symmetry['rotation']['$'][0:3], symmetry['rotation']['$'][3:6], symmetry['rotation']['$'][6:9], ], 'name': symmetry_name, } try: sym['t_rev'] = '1' if symmetry['info']['@time_reversal'] else '0' except KeyError: sym['t_rev'] = '0' try: sym['equivalent_atoms'] = symmetry['equivalent_atoms']['$'] except KeyError: pass try: sym['fractional_translation'] = symmetry['fractional_translation'] except KeyError: pass if symmetry_type == 'crystal_symmetry': symmetries.append(sym) elif symmetry_type == 'lattice_symmetry': lattice_symmetries.append(sym) else: raise XMLParseError( f'Unexpected type of symmetry: {symmetry_type}') if (nsym != len(symmetries)) or ( nrot != len(symmetries) + len(lattice_symmetries)): logs.warning.append( 'Inconsistent number of symmetries: nsym={}, nrot={}, len(symmetries)={}, len(lattice_symmetries)={}' .format(nsym, nrot, len(symmetries), len(lattice_symmetries))) xml_data = { #'pp_check_flag': True, # Currently not printed in the new format. # Signals whether the XML file is complete # and can be used for post-processing. Everything should be in the XML now, but in # any case, the new XML schema should mostly protect from incomplete files. 'lkpoint_dir': False, # Currently not printed in the new format. # Signals whether kpt-data are written in sub-directories. # Was generally true in the old format, but now all the eigenvalues are # in the XML file, under output / band_structure, so this is False. 'charge_density': './charge-density.dat', # A file name. Not printed in the new format. # The filename and path are considered fixed: <outdir>/<prefix>.save/charge-density.dat # TODO: change to .hdf5 if output format is HDF5 (issue #222) 'rho_cutoff_units': 'eV', 'wfc_cutoff_units': 'eV', 'fermi_energy_units': 'eV', 'k_points_units': '1 / angstrom', 'symmetries_units': 'crystal', 'constraint_mag': constraint_mag, 'magnetization_angle2': magnetization_angle2, 'magnetization_angle1': magnetization_angle1, 'starting_magnetization': starting_magnetization, 'has_electric_field': has_electric_field, 'has_dipole_correction': has_dipole_correction, 'lda_plus_u_calculation': 'dftU' in outputs, 'format_name': xml_dictionary['general_info']['xml_format']['@NAME'], 'format_version': xml_dictionary['general_info']['xml_format']['@VERSION'], # TODO: check that format version: a) matches the XSD schema version; b) is updated as well # See line 43 in Modules/qexsd.f90 'creator_name': xml_dictionary['general_info']['creator']['@NAME'].lower(), 'creator_version': xml_dictionary['general_info']['creator']['@VERSION'], 'non_colinear_calculation': non_colinear_calculation, 'do_magnetization': do_magnetization, 'time_reversal_flag': time_reversal, 'symmetries': symmetries, 'lattice_symmetries': lattice_symmetries, 'do_not_use_time_reversal': inputs.get('symmetry_flags', {}).get('noinv', None), 'spin_orbit_domag': outputs['magnetization']['do_magnetization'], 'fft_grid': [ value for _, value in sorted(outputs['basis_set']['fft_grid'].items()) ], 'lsda': lsda, 'number_of_spin_components': nspin, 'no_time_rev_operations': inputs.get('symmetry_flags', {}).get('no_t_rev', None), 'inversion_symmetry': inversion_symmetry, # the old tag was INVERSION_SYMMETRY and was set to (from the code): "invsym if true the system has inversion symmetry" 'number_of_bravais_symmetries': nrot, # lattice symmetries 'number_of_symmetries': nsym, # crystal symmetries 'wfc_cutoff': inputs.get('basis', {}).get('ecutwfc', -1.0) * CONSTANTS.hartree_to_ev, 'rho_cutoff': outputs['basis_set']['ecutrho'] * CONSTANTS.hartree_to_ev, # not always printed in input->basis 'smooth_fft_grid': [ value for _, value in sorted(outputs['basis_set']['fft_smooth'].items()) ], 'dft_exchange_correlation': inputs.get('dft', {}).get( 'functional', None), # TODO: also parse optional elements of 'dft' tag # WARNING: this is different between old XML and new XML 'spin_orbit_calculation': spin_orbit_calculation, 'q_real_space': outputs['algorithmic_info']['real_space_q'], } # alat is technically an optional attribute according to the schema, # but I don't know what to do if it's missing. atomic_structure is mandatory. output_alat_bohr = outputs['atomic_structure']['@alat'] output_alat_angstrom = output_alat_bohr * CONSTANTS.bohr_to_ang # Band structure if 'band_structure' in outputs: band_structure = outputs['band_structure'] smearing_xml = None if 'smearing' in outputs['band_structure']: smearing_xml = outputs['band_structure']['smearing'] elif 'smearing' in inputs: smearing_xml = inputs['smearing'] if smearing_xml: degauss = smearing_xml['@degauss'] # Versions below 19.03.04 (Quantum ESPRESSO<=6.4.1) incorrectly print degauss in Ry instead of Hartree if xml_version < StrictVersion('19.03.04'): degauss *= CONSTANTS.ry_to_ev else: degauss *= CONSTANTS.hartree_to_ev xml_data['degauss'] = degauss xml_data['smearing_type'] = smearing_xml['$'] num_k_points = band_structure['nks'] num_electrons = band_structure['nelec'] num_atomic_wfc = band_structure['num_of_atomic_wfc'] num_bands = band_structure.get('nbnd', None) num_bands_up = band_structure.get('nbnd_up', None) num_bands_down = band_structure.get('nbnd_dw', None) if num_bands is None and num_bands_up is None and num_bands_down is None: raise XMLParseError( 'None of `nbnd`, `nbnd_up` or `nbdn_dw` could be parsed.') # If both channels are `None` we are dealing with a non spin-polarized or non-collinear calculation elif num_bands_up is None and num_bands_down is None: spins = False # If only one of the channels is `None` we raise, because that is an inconsistent result elif num_bands_up is None or num_bands_down is None: raise XMLParseError( 'Only one of `nbnd_up` and `nbnd_dw` could be parsed') # Here it is a spin-polarized calculation, where for pw.x the number of bands in each channel should be identical. else: spins = True if num_bands_up != num_bands_down: raise XMLParseError( f'different number of bands for spin channels: {num_bands_up} and {num_bands_down}' ) if num_bands is not None and num_bands != num_bands_up + num_bands_down: raise XMLParseError( 'Inconsistent number of bands: nbnd={}, nbnd_up={}, nbnd_down={}' .format(num_bands, num_bands_up, num_bands_down)) if num_bands is None: num_bands = num_bands_up + num_bands_down # backwards compatibility; k_points = [] k_points_weights = [] ks_states = band_structure['ks_energies'] for ks_state in ks_states: k_points.append([ kp * 2 * np.pi / output_alat_angstrom for kp in ks_state['k_point']['$'] ]) k_points_weights.append(ks_state['k_point']['@weight']) if not spins: band_eigenvalues = [[]] band_occupations = [[]] for ks_state in ks_states: band_eigenvalues[0].append(ks_state['eigenvalues']['$']) band_occupations[0].append(ks_state['occupations']['$']) else: band_eigenvalues = [[], []] band_occupations = [[], []] for ks_state in ks_states: band_eigenvalues[0].append( ks_state['eigenvalues']['$'][0:num_bands_up]) band_eigenvalues[1].append( ks_state['eigenvalues']['$'][num_bands_up:num_bands]) band_occupations[0].append( ks_state['occupations']['$'][0:num_bands_up]) band_occupations[1].append( ks_state['occupations']['$'][num_bands_up:num_bands]) band_eigenvalues = np.array(band_eigenvalues) * CONSTANTS.hartree_to_ev band_occupations = np.array(band_occupations) if not spins: parser_assert_equal(band_eigenvalues.shape, (1, num_k_points, num_bands), 'Unexpected shape of band_eigenvalues') parser_assert_equal(band_occupations.shape, (1, num_k_points, num_bands), 'Unexpected shape of band_occupations') else: parser_assert_equal(band_eigenvalues.shape, (2, num_k_points, num_bands_up), 'Unexpected shape of band_eigenvalues') parser_assert_equal(band_occupations.shape, (2, num_k_points, num_bands_up), 'Unexpected shape of band_occupations') if not spins: xml_data['number_of_bands'] = num_bands else: # For collinear spin-polarized calculations `spins=True` and `num_bands` is sum of both channels. To get the # actual number of bands, we divide by two using integer division xml_data['number_of_bands'] = num_bands // 2 for key, value in [('number_of_bands_up', num_bands_up), ('number_of_bands_down', num_bands_down)]: if value is not None: xml_data[key] = value if 'fermi_energy' in band_structure: xml_data['fermi_energy'] = band_structure[ 'fermi_energy'] * CONSTANTS.hartree_to_ev bands_dict = { 'occupations': band_occupations, 'bands': band_eigenvalues, 'bands_units': 'eV', } xml_data['number_of_atomic_wfc'] = num_atomic_wfc xml_data['number_of_k_points'] = num_k_points xml_data['number_of_electrons'] = num_electrons xml_data['k_points'] = k_points xml_data['k_points_weights'] = k_points_weights xml_data['bands'] = bands_dict try: monkhorst_pack = inputs['k_points_IBZ']['monkhorst_pack'] except KeyError: pass # not using Monkhorst pack else: xml_data['monkhorst_pack_grid'] = [ monkhorst_pack[attr] for attr in ['@nk1', '@nk2', '@nk3'] ] xml_data['monkhorst_pack_offset'] = [ monkhorst_pack[attr] for attr in ['@k1', '@k2', '@k3'] ] if occupations is not None: xml_data['occupations'] = occupations if 'boundary_conditions' in outputs and 'assume_isolated' in outputs[ 'boundary_conditions']: xml_data['assume_isolated'] = outputs['boundary_conditions'][ 'assume_isolated'] # This is not printed by QE 6.3, but will be re-added before the next version if 'real_space_beta' in outputs['algorithmic_info']: xml_data['beta_real_space'] = outputs['algorithmic_info'][ 'real_space_beta'] conv_info = {} conv_info_scf = {} conv_info_opt = {} # NOTE: n_scf_steps refers to the number of SCF steps in the *last* loop only. # To get the total number of SCF steps in the run you should sum up the individual steps. # TODO: should we parse 'steps' too? Are they already added in the output trajectory? for key in ['convergence_achieved', 'n_scf_steps', 'scf_error']: try: conv_info_scf[key] = outputs['convergence_info']['scf_conv'][key] except KeyError: pass for key in ['convergence_achieved', 'n_opt_steps', 'grad_norm']: try: conv_info_opt[key] = outputs['convergence_info']['opt_conv'][key] except KeyError: pass if conv_info_scf: conv_info['scf_conv'] = conv_info_scf if conv_info_opt: conv_info['opt_conv'] = conv_info_opt if conv_info: xml_data['convergence_info'] = conv_info if 'status' in xml_dictionary: xml_data['exit_status'] = xml_dictionary['status'] # 0 = convergence reached; # -1 = SCF convergence failed; # 3 = ionic convergence failed # These might be changed in the future. Also see PW/src/run_pwscf.f90 try: berry_phase = outputs['electric_field']['BerryPhase'] except KeyError: pass else: # This is what I would like to do, but it's not retro-compatible # xml_data['berry_phase'] = {} # xml_data['berry_phase']['total_phase'] = berry_phase['totalPhase']['$'] # xml_data['berry_phase']['total_phase_modulus'] = berry_phase['totalPhase']['@modulus'] # xml_data['berry_phase']['total_ionic_phase'] = berry_phase['totalPhase']['@ionic'] # xml_data['berry_phase']['total_electronic_phase'] = berry_phase['totalPhase']['@electronic'] # xml_data['berry_phase']['total_polarization'] = berry_phase['totalPolarization']['polarization']['$'] # xml_data['berry_phase']['total_polarization_modulus'] = berry_phase['totalPolarization']['modulus'] # xml_data['berry_phase']['total_polarization_units'] = berry_phase['totalPolarization']['polarization']['@Units'] # xml_data['berry_phase']['total_polarization_direction'] = berry_phase['totalPolarization']['direction'] # parser_assert_equal(xml_data['berry_phase']['total_phase_modulus'].lower(), '(mod 2)', # "Unexpected modulus for total phase") # parser_assert_equal(xml_data['berry_phase']['total_polarization_units'].lower(), 'e/bohr^2', # "Unsupported units for total polarization") # Retro-compatible keys: polarization = berry_phase['totalPolarization']['polarization']['$'] polarization_units = berry_phase['totalPolarization']['polarization'][ '@Units'] polarization_modulus = berry_phase['totalPolarization']['modulus'] parser_assert( polarization_units in ['e/bohr^2', 'C/m^2'], f"Unsupported units '{polarization_units}' of total polarization") if polarization_units == 'e/bohr^2': polarization *= e_bohr2_to_coulomb_m2 polarization_modulus *= e_bohr2_to_coulomb_m2 xml_data['total_phase'] = berry_phase['totalPhase']['$'] xml_data['total_phase_units'] = '2pi' xml_data['ionic_phase'] = berry_phase['totalPhase']['@ionic'] xml_data['ionic_phase_units'] = '2pi' xml_data['electronic_phase'] = berry_phase['totalPhase']['@electronic'] xml_data['electronic_phase_units'] = '2pi' xml_data['polarization'] = polarization xml_data[ 'polarization_module'] = polarization_modulus # should be called "modulus" xml_data['polarization_units'] = 'C / m^2' xml_data['polarization_direction'] = berry_phase['totalPolarization'][ 'direction'] # TODO: add conversion for (e/Omega).bohr (requires to know Omega, the volume of the cell) # TODO (maybe): Not parsed: # - individual ionic phases # - individual electronic phases and weights # TODO: We should put the `non_periodic_cell_correction` string in (?) atoms = [[ atom['@name'], [coord * CONSTANTS.bohr_to_ang for coord in atom['$']] ] for atom in outputs['atomic_structure']['atomic_positions']['atom']] species = outputs['atomic_species']['species'] structure_data = { 'atomic_positions_units': 'Angstrom', 'direct_lattice_vectors_units': 'Angstrom', # ??? 'atoms_if_pos_list': [[1, 1, 1], [1, 1, 1]], 'number_of_atoms': outputs['atomic_structure']['@nat'], 'lattice_parameter': output_alat_angstrom, 'reciprocal_lattice_vectors': [ outputs['basis_set']['reciprocal_lattice']['b1'], outputs['basis_set']['reciprocal_lattice']['b2'], outputs['basis_set']['reciprocal_lattice']['b3'] ], 'atoms': atoms, 'cell': { 'lattice_vectors': lattice_vectors, 'volume': cell_volume(*lattice_vectors), 'atoms': atoms, }, 'lattice_parameter_xml': output_alat_bohr, 'number_of_species': outputs['atomic_species']['@ntyp'], 'species': { 'index': [i + 1 for i, specie in enumerate(species)], 'pseudo': [specie['pseudo_file'] for specie in species], 'mass': [specie['mass'] for specie in species], 'type': [specie['@name'] for specie in species] }, } xml_data['structure'] = structure_data return xml_data, logs
def parse_pw_xml_pre_6_2(xml_file, dir_with_bands): """Parse the content of XML output file written by `pw.x` with the old schema-less XML format. :param xml_file: filelike object to the XML output file :param dir_with_bands: absolute filepath to directory containing k-point XML files :returns: tuple of two dictionaries, with the parsed data and log messages, respectively """ import copy from xml.parsers.expat import ExpatError logs = get_logging_container() # NOTE : I often assume that if the xml file has been written, it has no internal errors. try: dom = parse(xml_file) except ExpatError: logs.error.append('Error in XML parseString: bad format') parsed = { 'bands': {}, 'structure': {}, } return parsed, logs parsed_data = {} structure_dict = {} # CARD CELL structure_dict, lattice_vectors, volume = copy.deepcopy( xml_card_cell(structure_dict, dom)) # CARD IONS structure_dict = copy.deepcopy( xml_card_ions(structure_dict, dom, lattice_vectors, volume)) #CARD HEADER parsed_data = copy.deepcopy(xml_card_header(parsed_data, dom)) # CARD CONTROL cardname = 'CONTROL' target_tags = read_xml_card(dom, cardname) for tagname in [ 'PP_CHECK_FLAG', 'LKPOINT_DIR', 'Q_REAL_SPACE', 'BETA_REAL_SPACE' ]: parsed_data[tagname.lower()] = parse_xml_child_bool( tagname, target_tags) # TODO: why this one isn't working? What is it actually? # # CARD MOVING_CELL # # try: # target_tags = dom.getElementsByTagName('MOVING_CELL')[0] # except: # raise IOError # # tagname='CELL_FACTOR' # parsed_data[tagname.lower()]=parse_xml_child_float(tagname,target_tags) # CARD ELECTRIC_FIELD cardname = 'ELECTRIC_FIELD' target_tags = read_xml_card(dom, cardname) for tagname in ['HAS_ELECTRIC_FIELD', 'HAS_DIPOLE_CORRECTION']: parsed_data[tagname.lower()] = parse_xml_child_bool( tagname, target_tags) if parsed_data['has_electric_field'] or parsed_data[ 'has_dipole_correction']: tagname = 'FIELD_DIRECTION' parsed_data[tagname.lower()] = parse_xml_child_integer( tagname, target_tags) for tagname in [ 'MAXIMUM_POSITION', 'INVERSE_REGION', 'FIELD_AMPLITUDE' ]: parsed_data[tagname.lower()] = parse_xml_child_float( tagname, target_tags) # CARD PLANE_WAVES parsed_data = copy.deepcopy(xml_card_planewaves(parsed_data, dom, 'pw')) # CARD SPIN parsed_data = copy.deepcopy(xml_card_spin(parsed_data, dom)) # CARD BRILLOUIN ZONE cardname = 'BRILLOUIN_ZONE' target_tags = read_xml_card(dom, cardname) tagname = 'NUMBER_OF_K-POINTS' parsed_data[tagname.replace('-', '_').lower()] = parse_xml_child_integer( tagname, target_tags) tagname = 'UNITS_FOR_K-POINTS' attrname = 'UNITS' metric = parse_xml_child_attribute_str(tagname, attrname, target_tags) if metric not in ['2 pi / a']: raise QEOutputParsingError('Error parsing attribute {},'.format(attrname) + \ ' tag {} inside {}, units unknown'.format(tagname, target_tags.tagName) ) k_points_units = metric for tagname, param in [['MONKHORST_PACK_GRID', 'nk'], ['MONKHORST_PACK_OFFSET', 'k']]: try: #a = target_tags.getElementsByTagName(tagname)[0] a = [_ for _ in target_tags.childNodes if _.nodeName == tagname][0] value = [int(a.getAttribute(param + str(i + 1))) for i in range(3)] parsed_data[tagname.replace('-', '_').lower()] = value except Exception: # I might not use the monkhorst pack grid pass kpoints = [] kpoints_weights = [] tagname_prefix = 'K-POINT.' a_dict = { _.nodeName: _ for _ in target_tags.childNodes if _.nodeName.startswith(tagname_prefix) } try: import numpy for i in range(parsed_data['number_of_k_points']): tagname = '{}{}'.format(tagname_prefix, i + 1) #a = target_tags.getElementsByTagName(tagname)[0] a = a_dict[tagname] b = a.getAttribute('XYZ').replace('\n', '').rsplit() value = [float(s) for s in b] metric = k_points_units if metric == '2 pi / a': value = [ 2. * numpy.pi * float(s) / structure_dict['lattice_parameter'] for s in value ] weight = float(a.getAttribute('WEIGHT')) kpoints.append(value) kpoints_weights.append(weight) parsed_data['k_points'] = kpoints parsed_data['k_points' + units_suffix] = default_k_points_units parsed_data['k_points_weights'] = kpoints_weights except Exception: raise QEOutputParsingError( 'Error parsing tag K-POINT.{} inside {}.'.format( i + 1, target_tags.tagName)) # I skip this card until someone will have a need for this. # try: # tagname='STARTING_K-POINTS' # num_starting_k_points=parse_xml_child_integer(tagname,target_tags) # # raise exception if there is no such a key # parsed_data[tagname.replace('-','_').lower()]=num_starting_k_points # # if parsed_data.get('starting_k_points'): # try: # kpoints=[] # for i in range(parsed_data['starting_k_points']): # tagname='K-POINT_START.'+str(i+1) # a=target_tags.getElementsByTagName(tagname)[0] # b=a.getAttribute('XYZ').replace('\n','').rsplit() # value=[ float(s) for s in b ] # metric=parsed_data['k_points_units'] # if metric=='2 pi / a': # value=[ float(s)/parsed_data['lattice_parameter'] for s in value ] # # weight=float(a.getAttribute('WEIGHT')) # # kpoints.append([value,weight]) # # parsed_data['k_point_start']=kpoints # except Exception: # raise QEOutputParsingError('Error parsing tag {}'.format(tagname)+\ # ' inside {}.'.format(target_tags.tagName ) ) # except Exception: # if not parsed_data.get('starting_k_points'): # pass # else: # parsed_data['xml_warnings'].append("Warning: could not parse {}".format(tagname)) # tagname='NORM-OF-Q' # TODO: decide if save this parameter # parsed_data[tagname.replace('-','_').lower()]=parse_xml_child_float(tagname,target_tags) # CARD BAND STRUCTURE INFO cardname = 'BAND_STRUCTURE_INFO' target_tags = read_xml_card(dom, cardname) for tagname in [ 'NUMBER_OF_SPIN_COMPONENTS', 'NUMBER_OF_ATOMIC_WFC', 'NUMBER_OF_BANDS' ]: parsed_data[tagname.replace('-','_').lower()] = \ parse_xml_child_integer(tagname,target_tags) tagname = 'NON-COLINEAR_CALCULATION' parsed_data[tagname.replace('-','_').lower()] = \ parse_xml_child_bool(tagname,target_tags) tagname = 'NUMBER_OF_ELECTRONS' parsed_data[tagname.replace('-','_').lower()] = \ parse_xml_child_float(tagname,target_tags) tagname = 'UNITS_FOR_ENERGIES' attrname = 'UNITS' units = parse_xml_child_attribute_str(tagname, attrname, target_tags) if units not in ['hartree']: raise QEOutputParsingError( 'Expected energy units in Hartree. Got instead {}'.format( parsed_data['energy_units'])) try: tagname = 'TWO_FERMI_ENERGIES' parsed_data[tagname.lower()] = parse_xml_child_bool( tagname, target_tags) except Exception: pass if parsed_data.get('two_fermi_energies', False): tagname = 'FERMI_ENERGY_UP' parsed_data[tagname.replace('-','_').lower()] = \ parse_xml_child_float(tagname,target_tags) * CONSTANTS.hartree_to_ev parsed_data[tagname.lower() + units_suffix] = default_energy_units tagname = 'FERMI_ENERGY_DOWN' parsed_data[tagname.replace('-','_').lower()] = \ parse_xml_child_float(tagname,target_tags) * CONSTANTS.hartree_to_ev parsed_data[tagname.lower() + units_suffix] = default_energy_units else: tagname = 'FERMI_ENERGY' parsed_data[tagname.replace('-','_').lower()] = \ parse_xml_child_float(tagname,target_tags) * CONSTANTS.hartree_to_ev parsed_data[tagname.lower() + units_suffix] = default_energy_units #CARD MAGNETIZATION_INIT cardname = 'MAGNETIZATION_INIT' target_tags = read_xml_card(dom, cardname) # 0 if false tagname = 'CONSTRAINT_MAG' parsed_data[tagname.lower()] = parse_xml_child_integer( tagname, target_tags) vec1 = [] vec2 = [] vec3 = [] for i in range(structure_dict['number_of_species']): tagname = 'SPECIE.' + str(i + 1) #a=target_tags.getElementsByTagName(tagname)[0] a = [_ for _ in target_tags.childNodes if _.nodeName == tagname][0] tagname2 = 'STARTING_MAGNETIZATION' vec1.append(parse_xml_child_float(tagname2, a)) tagname2 = 'ANGLE1' vec2.append(parse_xml_child_float(tagname2, a)) tagname2 = 'ANGLE2' vec3.append(parse_xml_child_float(tagname2, a)) parsed_data['starting_magnetization'] = vec1 parsed_data['magnetization_angle1'] = vec2 parsed_data['magnetization_angle2'] = vec3 #CARD OCCUPATIONS cardname = 'OCCUPATIONS' target_tags = read_xml_card(dom, cardname) for tagname in [ 'SMEARING_METHOD', 'TETRAHEDRON_METHOD', 'FIXED_OCCUPATIONS' ]: parsed_data[tagname.lower()] = parse_xml_child_bool( tagname, target_tags) if parsed_data['smearing_method']: parsed_data['occupations'] = 'smearing' elif parsed_data['tetrahedron_method']: parsed_data[ 'occupations'] = 'tetrahedra' # TODO: might also be tetrahedra_lin or tetrahedra_opt: check input? elif parsed_data['fixed_occupations']: parsed_data['occupations'] = 'fixed' # Remove the following deprecated keys for tagname in [ 'SMEARING_METHOD', 'TETRAHEDRON_METHOD', 'FIXED_OCCUPATIONS' ]: parsed_data.pop(tagname.lower()) #CARD CHARGE-DENSITY cardname = 'CHARGE-DENSITY' target_tags = read_xml_card(dom, cardname) try: attrname = 'iotk_link' value = str(target_tags.getAttribute(attrname)).rstrip().replace( '\n', '').lower() parsed_data[cardname.lower().rstrip().replace('-', '_')] = value except Exception: raise QEOutputParsingError('Error parsing attribute {},'.format(attrname) + \ ' card {}.'.format(cardname)) #CARD EIGENVALUES # Note: if this card is parsed, the dimension of the database grows very much! cardname = 'EIGENVALUES' target_tags = read_xml_card(dom, cardname) bands_dict = {} if dir_with_bands: try: occupations1 = [] occupations2 = [] bands1 = [] bands2 = [] for i in range(parsed_data['number_of_k_points']): tagname = 'K-POINT.' + str(i + 1) #a=target_tags.getElementsByTagName(tagname)[0] a = [ _ for _ in target_tags.childNodes if _.nodeName == tagname ][0] def read_bands_and_occupations(eigenval_n): # load the eigenval.xml file with open(eigenval_n, 'r') as eigenval_f: f = eigenval_f.read() eig_dom = parseString(f) tagname = 'UNITS_FOR_ENERGIES' a = eig_dom.getElementsByTagName(tagname)[0] attrname = 'UNITS' metric = str(a.getAttribute(attrname)) if metric not in ['Hartree']: raise QEOutputParsingError('Error parsing eigenvalues xml file, ' + \ 'units {} not implemented.'.format(metric)) tagname = 'EIGENVALUES' a = eig_dom.getElementsByTagName(tagname)[0] b = a.childNodes[0] value_e = [ float(s) * CONSTANTS.hartree_to_ev for s in b.data.split() ] tagname = 'OCCUPATIONS' a = eig_dom.getElementsByTagName(tagname)[0] b = a.childNodes[0] value_o = [float(s) for s in b.data.split()] return value_e, value_o # two cases: in cases of magnetic calculations, I have both spins try: tagname2 = 'DATAFILE' b = a.getElementsByTagName(tagname2)[0] attrname = 'iotk_link' value = str(b.getAttribute(attrname)).rstrip().replace( '\n', '') eigenval_n = os.path.join(dir_with_bands, value) value_e, value_o = read_bands_and_occupations(eigenval_n) bands1.append(value_e) occupations1.append(value_o) except IndexError: tagname2 = 'DATAFILE.1' b1 = a.getElementsByTagName(tagname2)[0] tagname2 = 'DATAFILE.2' b2 = a.getElementsByTagName(tagname2)[0] attrname = 'iotk_link' value1 = str(b1.getAttribute(attrname)).rstrip().replace( '\n', '') value2 = str(b2.getAttribute(attrname)).rstrip().replace( '\n', '') eigenval_n = os.path.join(dir_with_bands, value1) value_e, value_o = read_bands_and_occupations(eigenval_n) bands1.append(value_e) occupations1.append(value_o) eigenval_n = os.path.join(dir_with_bands, value2) value_e, value_o = read_bands_and_occupations(eigenval_n) bands2.append(value_e) occupations2.append(value_o) occupations = [occupations1] bands = [bands1] if occupations2: occupations.append(occupations2) if bands2: bands.append(bands2) bands_dict['occupations'] = occupations bands_dict['bands'] = bands bands_dict['bands' + units_suffix] = default_energy_units except Exception as exception: raise QEOutputParsingError('Error parsing card {}: {} {}'.format( tagname, exception.__class__.__name__, exception)) # if dir_with_bands: # # if there is at least an empty band: # if parsed_data['smearing_method'] or \ # parsed_data['number_of_electrons']/2. < parsed_data['number_of_bands']: # # #TODO: currently I do it only for non magnetic systems # if len(bands_dict['occupations'])==1: # # initialize lumo # lumo = parsed_data['h**o']+10000.0 # for list_bands in bands_dict['bands']: # for value in list_bands: # if (value > parsed_data['fermi_energy']) and (value<lumo): # lumo=value # if (lumo==parsed_data['h**o']+10000.0) or lumo<=parsed_data['fermi_energy']: # #might be an error for bandgap larger than 10000 eV... # raise QEOutputParsingError('Error while searching for LUMO.') # parsed_data['lumo']=lumo # parsed_data['lumo'+units_suffix] = default_energy_units # CARD symmetries parsed_data = copy.deepcopy(xml_card_symmetries(parsed_data, dom)) # CARD EXCHANGE_CORRELATION parsed_data = copy.deepcopy(xml_card_exchangecorrelation(parsed_data, dom)) parsed_data['bands'] = bands_dict parsed_data['structure'] = structure_dict return parsed_data, logs
def parse_stdout(self, stdout_str): """ Parses the output written to StdOut to retrieve basic information about the post processing :param stdout_str: the stdout file read in as a single string """ def detect_important_message(logs, line): """ Detect know errors and warnings printed in the stdout :param logs: :param line: a line from the stdout as a string """ message_map = { 'error': { 'xml data file not found': 'ERROR_PARENT_XML_MISSING' }, 'warning': { 'Warning:': None, 'DEPRECATED:': None, } } # Match any known error and warning messages for marker, message in message_map['error'].items(): if marker in line: if message is None: message = line logs.error.append(message) for marker, message in message_map['warning'].items(): if marker in line: if message is None: message = line logs.warning.append(message) stdout_lines = stdout_str.splitlines() logs = get_logging_container() output_dict = {} # Check for job completion, indicating that pp.x exited without interruption, even if there was an error. for line in stdout_lines: if 'JOB DONE' in line: break else: logs.error.append('ERROR_OUTPUT_STDOUT_INCOMPLETE') # Detect any issues and detect job completion for line in stdout_lines: detect_important_message(logs, line) # Parse useful data from stdout for line in stdout_lines: if 'Check:' in line: # QE < 6.5 split_line = line.split('=') if 'negative/imaginary' in line: # QE6.1-6.3 output_dict['negative_core_charge'] = float( split_line[-1].split()[0]) output_dict['imaginary_core_charge'] = float( split_line[-1].split()[-1]) else: # QE6.4 output_dict['negative_core_charge'] = float(split_line[1]) if 'Min, Max, imaginary charge:' in line: split_line = line.split() output_dict['charge_min'] = float(split_line[-3]) output_dict['charge_max'] = float(split_line[-2]) output_dict['charge_img'] = float(split_line[-1]) if 'plot_num = ' in line: output_dict['plot_num'] = int(line.split('=')[1]) if 'Plot Type:' in line: output_dict['plot_type'] = line.split( 'Output format')[0].split(':')[-1].strip() output_dict['output_format'] = line.split(':')[-1].strip() return logs, output_dict
def parse_neb_text_output(data, input_dict={}): """Parses the text output of QE Neb. :param data: a string, the file as read by read() :param input_dict: dictionary with the input parameters :return parsed_data: dictionary with key values, referring to quantities at the last step. :return iteration_data: key,values referring to intermediate iterations. Empty dictionary if no value is present. :return critical_messages: a list with critical messages. If any is found in parsed_data['warnings'], the calculation is FAILED! """ from aiida_quantumespresso.parsers.parse_raw import parse_output_error from aiida_quantumespresso.utils.mapping import get_logging_container from collections import defaultdict # TODO: find a more exhaustive list of the common errors of neb # critical warnings: if any is found, the calculation status is FAILED critical_warnings = { 'scf convergence NOT achieved on image': 'SCF did not converge for a given image', 'Maximum CPU time exceeded': 'Maximum CPU time exceeded', 'reached the maximum number of steps': 'Maximum number of iterations reached in the image optimization', } minor_warnings = { 'Warning:': None, } all_warnings = dict( list(critical_warnings.items()) + list(minor_warnings.items())) parsed_data = {} parsed_data['warnings'] = [] iteration_data = defaultdict(list) # parse time, starting from the end # apparently, the time is written multiple times for line in reversed(data.split('\n')): if 'NEB' in line and 'WALL' in line: try: time = line.split('CPU')[1].split('WALL')[0].strip() parsed_data['wall_time'] = time except Exception: parsed_data['warnings'].append( 'Error while parsing wall time.') try: parsed_data['wall_time_seconds'] = \ convert_qe_time_to_sec(parsed_data['wall_time']) except ValueError: raise QEOutputParsingError( 'Unable to convert wall_time in seconds.') break # set by default the calculation as not converged. parsed_data['converged'] = [False, 0] logs = get_logging_container() lines = data.split('\n') for count, line in enumerate(lines): if 'initial path length' in line: initial_path_length = float(line.split('=')[1].split('bohr')[0]) parsed_data[ 'initial_path_length'] = initial_path_length * bohr_to_ang elif 'initial inter-image distance' in line: initial_image_dist = float(line.split('=')[1].split('bohr')[0]) parsed_data[ 'initial_image_dist'] = initial_image_dist * bohr_to_ang elif 'string_method' in line: parsed_data['string_method'] = line.split('=')[1].strip() elif 'restart_mode' in line: parsed_data['restart_mode'] = line.split('=')[1].strip() elif 'opt_scheme' in line: parsed_data['opt_scheme'] = line.split('=')[1].strip() elif 'num_of_images' in line: parsed_data['num_of_images'] = int(line.split('=')[1]) elif 'nstep_path' in line: parsed_data['nstep_path'] = int(line.split('=')[1]) elif 'CI_scheme' in line: parsed_data['ci_scheme'] = line.split('=')[1].strip() elif 'first_last_opt' in line: parsed_data['first_last_opt'] = True if line.split( '=')[1] == 'T' else False elif 'use_freezing' in line: parsed_data['use_freezing'] = True if line.split( '=')[1] == 'T' else False elif ' ds ' in line: parsed_data['ds_au'] = float(line.split('=')[1].split('a.u.')[0]) elif ' k_max' in line: parsed_data['k_max'] = float(line.split('=')[1].split('a.u.')[0]) elif ' k_min_au' in line: parsed_data['k_min_au'] = float( line.split('=')[1].split('a.u.')[0]) elif 'suggested k_max' in line: parsed_data['suggested_k_max_au'] = float( line.split('=')[1].split('a.u.')[0]) elif 'suggested k_min' in line: parsed_data['suggested_k_min_au'] = float( line.split('=')[1].split('a.u.')[0]) elif 'path_thr' in line: parsed_data['path_thr'] = float(line.split('=')[1].split('eV')[0]) elif 'list of climbing images' in line: parsed_data['climbing_images_manual'] = [ int(_) for _ in line.split(':')[1].split(',')[:-1] ] elif 'neb: convergence achieved in' in line: parsed_data['converged'] = [ True, int(line.split('iteration')[0].split()[-1]) ] elif '%%%%%%%%%%%%%%' in line: parse_output_error(lines, count, logs) elif any(i in line for i in all_warnings): message = [ all_warnings[i] for i in all_warnings.keys() if i in line ][0] if message is not None: parsed_data['warnings'].append(message) parsed_data['warnings'].extend(logs.error) try: num_images = parsed_data['num_of_images'] except KeyError: try: num_images = input_dict['PATH']['num_of_images'] except KeyError: raise QEOutputParsingError( 'No information on the number ' 'of images available (neither in input nor in output') iteration_lines = data.split('-- iteration')[1:] iteration_lines = [i.split('\n') for i in iteration_lines] for iteration in iteration_lines: for count, line in enumerate(iteration): if 'activation energy (->)' in line: activ_energy = float(line.split('=')[1].split('eV')[0]) iteration_data['forward_activation_energy'].append( activ_energy) elif 'activation energy (<-)' in line: activ_energy = float(line.split('=')[1].split('eV')[0]) iteration_data['backward_activation_energy'].append( activ_energy) elif 'image energy (eV) error (eV/A) frozen' in line: energies = [] forces = [] frozen = [] try: for i in range(num_images): split_line = iteration[count + 2 + i].split()[1:] energies.append(float(split_line[0])) forces.append(float(split_line[1])) frozen.append(True if split_line[2] == 'T' else False) iteration_data['image_energies'].append(energies) iteration_data['image_forces'].append(forces) iteration_data['image_frozen'].append(frozen) except Exception: parsed_data['warnings'].append( 'Error while parsing the image energies and forces.') elif 'climbing image' in line: iteration_data['climbing_image_auto'].append( [int(_) for _ in line.split('=')[1].split(',')]) elif 'path length' in line: path_length = float(line.split('=')[1].split('bohr')[0]) iteration_data['path_length'].append(path_length * bohr_to_ang) elif 'inter-image distance' in line: image_dist = float(line.split('=')[1].split('bohr')[0]) iteration_data['image_dist'].append(image_dist * bohr_to_ang) return parsed_data, dict(iteration_data), list(critical_warnings.values())