def parse_raw_dos(dos_file, array_names, array_units): """ This function takes as input the dos_file as a list of filelines along with information on how to give labels and units to the parsed data :param dos_file: dos file lines in the form of a list :type dos_file: list :param array_names: list of all array names, note that array_names[0] is for the case with non spin-polarized calculations and array_names[1] is for the case with spin-polarized calculation :type array_names: list :param array_units: list of all array units, note that array_units[0] is for the case with non spin-polarized calculations and array_units[1] is for the case with spin-polarized calculation :type array_units: list :return array_data: narray, a dictionary for ArrayData type, which contains all parsed dos output along with labels and units :return spin: boolean, indicates whether the parsed results are spin polarized """ dos_header = dos_file[0] try: dos_data = np.genfromtxt(dos_file) except ValueError: raise QEOutputParsingError('dosfile could not be loaded ' ' using genfromtxt') if len(dos_data) == 0: raise QEOutputParsingError("Dos file is empty.") if np.isnan(dos_data).any(): raise QEOutputParsingError("Dos file contains non-numeric elements.") # Checks the number of columns, essentially to see whether spin was used if len(dos_data[0]) == 3: # spin is not used array_names = array_names[0] array_units = array_units[0] spin = False elif len(dos_data[0]) == 4: # spin is used array_names = array_names[1] array_units = array_units[1] spin = True else: raise QEOutputParsingError("Dos file in format that the parser is not " "designed to handle.") i = 0 array_data = {} array_data['header'] = np.array(dos_header) while i < len(array_names): array_data[array_names[i]] = dos_data[:, i] array_data[array_names[i] + '_units'] = np.array(array_units[i]) i += 1 return array_data, spin
def str2bool(string): try: false_items = ["f", "0", "false", "no"] true_items = ["t", "1", "true", "yes"] string = str(string.lower().strip()) if string in false_items: return False if string in true_items: return True else: raise QEOutputParsingError('Error converting string ' '{} to boolean value.'.format(string)) except Exception: raise QEOutputParsingError('Error converting string to boolean.')
def parse_ph_tensor(data): """ Parse the xml tensor file of QE v5.0.3 data must be read from the file with the .read() function (avoid readlines) """ dom = parseString(data) parsed_data = {} parsed_data['xml_warnings'] = [] # card EF_TENSORS cardname = 'EF_TENSORS' target_tags = read_xml_card(dom,cardname) tagname='DONE_ELECTRIC_FIELD' parsed_data[tagname.lower()]=parse_xml_child_bool(tagname,target_tags) if parsed_data[tagname.lower()]: try: second_tagname = 'DIELECTRIC_CONSTANT' parsed_data[second_tagname.lower()] = parse_xml_matrices(second_tagname, target_tags) except: raise QEOutputParsingError('Failed to parse Dielectric constant') tagname='DONE_EFFECTIVE_CHARGE_EU' parsed_data[tagname.lower()]=parse_xml_child_bool(tagname,target_tags) if parsed_data[tagname.lower()]: try: second_tagname = 'EFFECTIVE_CHARGES_EU' dumb_matrix = parse_xml_matrices(second_tagname,target_tags) # separate the elements of the messy matrix, with a matrix 3x3 for each element new_matrix = [] this_at = [] for i in dumb_matrix: this_at.append(i) if len(this_at) == 3: new_matrix.append(this_at) this_at = [] parsed_data[second_tagname.lower()] = new_matrix except: raise QEOutputParsingError('Failed to parse effective charges eu') return parsed_data
def parse_xml_child_integer(tagname, target_tags): try: # a=target_tags.getElementsByTagName(tagname)[0] a = [_ for _ in target_tags.childNodes if _.nodeName == tagname][0] b = a.childNodes[0] return int(b.data) except Exception: raise QEOutputParsingError('Error parsing tag {} inside {}'.format( tagname, target_tags.tagName))
def parse_xml_child_str(tagname, target_tags): try: # a=target_tags.getElementsByTagName(tagname)[0] a = [_ for _ in target_tags.childNodes if _.nodeName == tagname][0] b = a.childNodes[0] return str(b.data).rstrip().replace('\n', '') except Exception: raise QEOutputParsingError('Error parsing tag {} inside {}' \ .format(tagname, target_tags.tagName))
def parse_raw_out_basic(out_file, calc_name): """ A very simple parser for the standard out, usually aiida.out. Currently only parses basic warnings and the walltime. :param out_file: the standard out to be parsed :param calc_name: the name of the calculation, e.g. PROJWFC :return: parsed_data """ # read file parsed_data = {} parsed_data['warnings'] = [] # critical warnings: if any is found, the calculation status is FAILED critical_warnings = {'Maximum CPU time exceeded':'Maximum CPU time exceeded', '%%%%%%%%%%%%%%':None, } minor_warnings = {'Warning:':None, 'DEPRECATED:':None, } all_warnings = dict(critical_warnings.items() + minor_warnings.items()) for count in range (len(out_file)): line = out_file[count] # parse the global file, for informations that are written only once if 'TOTAL NUMBER OF OPTIMAL BASIS VECTORS :' in line: parsed_data['number_optimal_basis_vectors'] = int(line.split(':')[-1]) if calc_name in line and 'WALL' in line: try: time = line.split('CPU')[1].split('WALL')[0] cpu_time = line.split(':')[1].split('CPU')[0] parsed_data['wall_time'] = time parsed_data['cpu_time'] = cpu_time except ValueError: parsed_data['warnings'].append('Error while parsing wall time.') try: parsed_data['wall_time_seconds'] = convert_qe_time_to_sec(time) parsed_data['cpu_time_seconds'] = convert_qe_time_to_sec(cpu_time) except ValueError: raise QEOutputParsingError("Unable to convert wall_time in seconds.") # Parsing of errors elif any( i in line for i in all_warnings): message = [ all_warnings[i] for i in all_warnings.keys() if i in line][0] if message is None: message = line if '%%%%%%%%%%%%%%' in line: message = None messages = parse_QE_errors(out_file,count,parsed_data['warnings']) # if it found something, add to log try: parsed_data['warnings'].extend(messages) except UnboundLocalError: pass if message is not None: parsed_data['warnings'].append(message) return parsed_data
def find_orbitals_from_statelines(out_info_dict): """ This function reads in all the state_lines, that is, the lines describing which atomic states, taken from the pseudopotential, are used for the projection. Then it converts these state_lines into a set of orbitals :param out_info_dict: contains various technical internals useful in parsing :return: orbitals, a list of orbitals suitable for setting ProjectionData """ out_file = out_info_dict["out_file"] atomnum_re = re.compile(r"atom (.*?)\(") element_re = re.compile(r"\((.*?)\)") lnum_re = re.compile(r"l=(.*?)m=") mnum_re = re.compile(r"m=(.*?)\)") wfc_lines = out_info_dict["wfc_lines"] state_lines = [out_file[wfc_line] for wfc_line in wfc_lines] state_dicts = [] for state_line in state_lines: try: state_dict = {} state_dict["atomnum"] = int(atomnum_re.findall(state_line)[0]) state_dict["atomnum"] -= 1 # to keep with orbital indexing state_dict["kind_name"] = element_re.findall(state_line)[0].strip() state_dict["angular_momentum"] = int(lnum_re.findall(state_line)[0]) state_dict["magnetic_number"] = int(mnum_re.findall(state_line)[0]) state_dict["magnetic_number"] -= 1 # to keep with orbital indexing except ValueError: raise QEOutputParsingError("State lines are not formatted " "in a standard way.") state_dicts.append(state_dict) # here is some logic to figure out the value of radial_nodes to use new_state_dicts = [] for i in range(len(state_dicts)): radial_nodes = 0 state_dict = state_dicts[i].copy() for j in range(i-1, -1, -1): if state_dict == state_dicts[j]: radial_nodes += 1 state_dict["radial_nodes"] = radial_nodes new_state_dicts.append(state_dict) state_dicts = new_state_dicts # here is some logic to assign positions based on the atom_index structure = out_info_dict["structure"] for state_dict in state_dicts: site_index = state_dict.pop("atomnum") state_dict["position"] = structure.sites[site_index].position # here we set the resulting state_dicts to a new set of orbitals orbitals = [] realh = OrbitalFactory("realhydrogen") for state_dict in state_dicts: this_orb = realh() this_orb.set_orbital_dict(state_dict) orbitals.append(this_orb) return orbitals
def __init__(self, calculation): """ Initialize the instance of ProjwfcParser """ # check for valid input if not isinstance(calculation, ProjwfcCalculation): raise QEOutputParsingError("Input calc must be a " "ProjwfcCalculation") self._calc = calculation super(ProjwfcParser, self).__init__(calculation)
def parse_xml_child_attribute_int(tagname, attributename, target_tags): try: # a=target_tags.getElementsByTagName(tagname)[0] a = [_ for _ in target_tags.childNodes if _.nodeName == tagname][0] value = int(a.getAttribute(attributename)) return value except Exception: raise QEOutputParsingError( 'Error parsing attribute {}, tag {} inside {}'.format( attributename, tagname, target_tags.tagName))
def __init__(self, calculation): """ Initialize the instance of DosParser """ # check for valid input if not isinstance(calculation, SimpleCalculation): raise QEOutputParsingError("Input calc must be a DosCalculation") self._calc = calculation super(SimpleParser, self).__init__(calculation)
def read_xml_card(dom, cardname): try: root_node = [ _ for _ in dom.childNodes if isinstance(_, xml.dom.minidom.Element) and _.nodeName == "Root" ][0] the_card = [_ for _ in root_node.childNodes if _.nodeName == cardname][0] # the_card = dom.getElementsByTagName(cardname)[0] return the_card except Exception as e: print e raise QEOutputParsingError('Error parsing tag {}'.format(cardname))
def parse_cp_xml_output(data): """ Parse xml data data must be a single string, as returned by file.read() (notice the difference with parse_text_output!) On output, a dictionary with parsed values. Democratically, we have decided to use picoseconds as units of time, eV for energies, Angstrom for lengths. """ import copy dom = parseString(data) parsed_data = {} # CARD STATUS cardname = 'STATUS' target_tags = read_xml_card(dom, cardname) tagname = 'STEP' attrname = 'ITERATION' parsed_data[(tagname + '_' + attrname).lower()] = int( parse_xml_child_attribute_str(tagname, attrname, target_tags)) tagname = 'TIME' attrname = 'UNITS' value = parse_xml_child_float(tagname, target_tags) units = parse_xml_child_attribute_str(tagname, attrname, target_tags) if units not in ['pico-seconds']: raise QEOutputParsingError( "Units {} are not supported by parser".format(units)) parsed_data[tagname.lower()] = value tagname = 'TITLE' parsed_data[tagname.lower()] = parse_xml_child_str(tagname, target_tags) # CARD CELL parsed_data, lattice_vectors, volume = copy.deepcopy( xml_card_cell(parsed_data, dom)) # CARD IONS parsed_data = copy.deepcopy( xml_card_ions(parsed_data, dom, lattice_vectors, volume)) # CARD TIMESTEPS cardname = 'TIMESTEPS' target_tags = read_xml_card(dom, cardname) for tagname in ['STEP0', 'STEPM']: try: tag = target_tags.getElementsByTagName(tagname)[0] try: second_tagname = 'ACCUMULATORS' second_tag = tag.getElementsByTagName(second_tagname)[0] data = second_tag.childNodes[0].data.rstrip().split( ) # list of floats parsed_data[second_tagname.replace( '-', '_').lower()] = [float(i) for i in data] except: pass second_tagname = 'IONS_POSITIONS' second_tag = tag.getElementsByTagName(second_tagname)[0] third_tagname = 'stau' third_tag = second_tag.getElementsByTagName(third_tagname)[0] list_data = third_tag.childNodes[0].data.rstrip().split() list_data = [float(i) for i in list_data] # convert to matrix val = [] mat = [] for i, data in enumerate(list_data): val.append(data) if (i + 1) % 3 == 0: mat.append(val) val = [] parsed_data[(second_tagname + '_' + third_tagname).replace( '-', '_').lower()] = mat third_tagname = 'svel' third_tag = second_tag.getElementsByTagName(third_tagname)[0] list_data = third_tag.childNodes[0].data.rstrip().split() list_data = [float(i) for i in list_data] # convert to matrix val = [] mat = [] for i, data in enumerate(list_data): val.append(data) if (i + 1) % 3 == 0: mat.append(val) val = [] parsed_data[(second_tagname + '_' + third_tagname).replace( '-', '_').lower()] = mat try: third_tagname = 'taui' third_tag = second_tag.getElementsByTagName(third_tagname)[0] list_data = third_tag.childNodes[0].data.rstrip().split() list_data = [float(i) for i in list_data] # convert to matrix val = [] mat = [] for i, data in enumerate(list_data): val.append(data) if (i + 1) % 3 == 0: mat.append(val) val = [] parsed_data[(second_tagname + '_' + third_tagname).replace( '-', '_').lower()] = mat except: pass try: third_tagname = 'cdmi' third_tag = second_tag.getElementsByTagName(third_tagname)[0] list_data = third_tag.childNodes[0].data.rstrip().split() parsed_data[(second_tagname + '_' + third_tagname).replace( '-', '_').lower()] = [float(i) for i in list_data] except: pass try: third_tagname = 'force' third_tag = second_tag.getElementsByTagName(third_tagname)[0] list_data = third_tag.childNodes[0].data.rstrip().split() list_data = [float(i) for i in list_data] # convert to matrix val = [] mat = [] for i, data in enumerate(list_data): val.append(data) if (i + 1) % 3 == 0: mat.append(val) val = [] parsed_data[(second_tagname + '_' + third_tagname).replace( '-', '_').lower()] = mat except: pass second_tagname = 'IONS_NOSE' second_tag = tag.getElementsByTagName(second_tagname)[0] third_tagname = 'nhpcl' third_tag = second_tag.getElementsByTagName(third_tagname)[0] parsed_data[(second_tagname + '_' + third_tagname).replace( '-', '_').lower()] = float(third_tag.childNodes[0].data) third_tagname = 'nhpdim' third_tag = second_tag.getElementsByTagName(third_tagname)[0] parsed_data[(second_tagname + '_' + third_tagname).replace( '-', '_').lower()] = float(third_tag.childNodes[0].data) third_tagname = 'xnhp' third_tag = second_tag.getElementsByTagName(third_tagname)[0] parsed_data[(second_tagname + '_' + third_tagname).replace( '-', '_').lower()] = float(third_tag.childNodes[0].data) try: third_tagname = 'vnhp' third_tag = second_tag.getElementsByTagName(third_tagname)[0] parsed_data[(second_tagname + '_' + third_tagname).replace( '-', '_').lower()] = float(third_tag.childNodes[0].data) except: pass try: second_tagname = 'ekincm' second_tag = tag.getElementsByTagName(second_tagname)[0] parsed_data[second_tagname.replace('-', '_').lower()] = float( second_tag.childNodes[0].data) except: pass second_tagname = 'ELECTRONS_NOSE' second_tag = tag.getElementsByTagName(second_tagname)[0] try: third_tagname = 'xnhe' third_tag = second_tag.getElementsByTagName(third_tagname)[0] parsed_data[(second_tagname + '_' + third_tagname).replace( '-', '_').lower()] = float(third_tag.childNodes[0].data) except: pass try: third_tagname = 'vnhe' third_tag = second_tag.getElementsByTagName(third_tagname)[0] parsed_data[(second_tagname + '_' + third_tagname).replace( '-', '_').lower()] = float(third_tag.childNodes[0].data) except: pass second_tagname = 'CELL_PARAMETERS' second_tag = tag.getElementsByTagName(second_tagname)[0] try: third_tagname = 'ht' third_tag = second_tag.getElementsByTagName(third_tagname)[0] list_data = third_tag.childNodes[0].data.rstrip().split() list_data = [float(i) for i in list_data] # convert to matrix val = [] mat = [] for i, data in enumerate(list_data): val.append(data) if (i + 1) % 3 == 0: mat.append(val) val = [] parsed_data[(second_tagname + '_' + third_tagname).replace( '-', '_').lower()] = mat except: pass try: third_tagname = 'htvel' third_tag = second_tag.getElementsByTagName(third_tagname)[0] list_data = third_tag.childNodes[0].data.rstrip().split() list_data = [float(i) for i in list_data] # convert to matrix val = [] mat = [] for i, data in enumerate(list_data): val.append(data) if (i + 1) % 3 == 0: mat.append(val) val = [] parsed_data[(second_tagname + '_' + third_tagname).replace( '-', '_').lower()] = mat except: pass try: third_tagname = 'gvel' third_tag = second_tag.getElementsByTagName(third_tagname)[0] list_data = third_tag.childNodes[0].data.rstrip().split() list_data = [float(i) for i in list_data] # convert to matrix val = [] mat = [] for i, data in enumerate(list_data): val.append(data) if (i + 1) % 3 == 0: mat.append(val) val = [] parsed_data[(second_tagname + '_' + third_tagname).replace( '-', '_').lower()] = mat except: pass second_tagname = 'CELL_NOSE' second_tag = tag.getElementsByTagName(second_tagname)[0] try: third_tagname = 'xnhh' third_tag = second_tag.getElementsByTagName(third_tagname)[0] list_data = third_tag.childNodes[0].data.rstrip().split() list_data = [float(i) for i in list_data] # convert to matrix val = [] mat = [] for i, data in enumerate(list_data): val.append(data) if (i + 1) % 3 == 0: mat.append(val) val = [] parsed_data[(second_tagname + '_' + third_tagname).replace( '-', '_').lower()] = mat except: pass try: third_tagname = 'vnhh' third_tag = second_tag.getElementsByTagName(third_tagname)[0] list_data = third_tag.childNodes[0].data.rstrip().split() list_data = [float(i) for i in list_data] # convert to matrix val = [] mat = [] for i, data in enumerate(list_data): val.append(data) if (i + 1) % 3 == 0: mat.append(val) val = [] parsed_data[(second_tagname + '_' + third_tagname).replace( '-', '_').lower()] = mat except: pass except: raise QEOutputParsingError( 'Error parsing CARD {}'.format(cardname)) # CARD BAND_STRUCTURE_INFO cardname = 'BAND_STRUCTURE_INFO' target_tags = read_xml_card(dom, cardname) tagname = 'NUMBER_OF_ATOMIC_WFC' parsed_data[tagname.lower().replace('-', '_')] = parse_xml_child_integer( tagname, target_tags) tagname = 'NUMBER_OF_ELECTRONS' parsed_data[tagname.lower().replace('-', '_')] = int( parse_xml_child_float(tagname, target_tags)) tagname = 'NUMBER_OF_BANDS' parsed_data[tagname.lower().replace('-', '_')] = parse_xml_child_integer( tagname, target_tags) tagname = 'NUMBER_OF_SPIN_COMPONENTS' parsed_data[tagname.lower().replace('-', '_')] = parse_xml_child_integer( tagname, target_tags) return parsed_data
def parse_cp_raw_output(out_file, xml_file=None, xml_counter_file=None): parser_version = '0.1' parser_info = {} parser_info['parser_warnings'] = [] parser_info['parser_info'] = 'AiiDA QE Parser v{}'.format(parser_version) # analyze the xml if xml_file is not None: try: with open(xml_file, 'r') as f: xml_lines = f.read() except IOError: raise QEOutputParsingError( "Failed to open xml file: %s.".format(xml_file)) # TODO: this function should probably be the same of pw. # after all, the parser was fault-tolerant xml_data = parse_cp_xml_output(xml_lines) else: parser_info['parser_warnings'].append( 'Skipping the parsing of the xml file.') xml_data = {} # analyze the counter file, which keeps info on the steps if xml_counter_file is not None: try: with open(xml_counter_file, 'r') as f: xml_counter_lines = f.read() except IOError: raise QEOutputParsingError( "Failed to open xml counter file: %s.".format(xml_file)) xml_counter_data = parse_cp_xml_counter_output(xml_counter_lines) else: xml_counter_data = {} # analyze the standard output try: with open(out_file, 'r') as f: out_lines = f.readlines() except IOError: raise QEOutputParsingError("Failed to open output file: %s." % out_file) # understand if the job ended smoothly job_successful = False for line in reversed(out_lines): if 'JOB DONE' in line: job_successful = True break out_data = parse_cp_text_output(out_lines, xml_data) for key in out_data.keys(): if key in xml_data.keys(): raise AssertionError('%s found in both dictionaries' % key) if key in xml_counter_data.keys(): raise AssertionError('%s found in both dictionaries' % key) # out_data keys take precedence and overwrite xml_data keys, # if the same key name is shared by both (but this should not happen!) final_data = dict(xml_data.items() + out_data.items() + xml_counter_data.items()) # TODO: parse the trajectory and save them in a reasonable format return final_data, job_successful
def parse_ph_dynmat(data,lattice_parameter=None,also_eigenvectors=False, parse_header=False): """ parses frequencies and eigenvectors of a single dynamical matrix :param data: the text read with the function readlines() :param lattice_parameter: the lattice_parameter ('alat' in QE jargon). If None, q_point is kept in 2pi/a coordinates as in the dynmat file. :param also_eigenvectors: if True, return an additional 'eigenvectors' array in output, containing also the eigenvectors. This will be a list of lists, that when converted to a numpy array has 4 indices, with shape Neigenstates x Natoms x 3(xyz) x 2 (re,im) To convert to a complex numpy array, you can use:: ev = np.array(parsed_data['eigenvectors']) ev = ev[:,:,:,0] + 1j * ev[:,:,:,1] :param parse_header: if True, return additional keys in the returned parsed_data dictionary, including information from the header :return parsed_data: a dictionary with parsed values and units """ parsed_data = {} parsed_data['warnings'] = [] if 'Dynamical matrix file' not in data[0]: raise QEOutputParsingError('Dynamical matrix is not in the expected format') frequencies = [] eigenvectors = [] starting_line = 1 if parse_header: header_dict = {"warnings": []} try: pieces = data[2].split() if len(pieces) != 9: raise QEOutputParsingError("Wrong # of elements on line 3") try: num_species = int(pieces[0]) num_atoms = int(pieces[1]) header_dict['ibrav'] = int(pieces[2]) header_dict['celldm'] = [float(i) for i in pieces[3:]] # In angstrom alat = header_dict['celldm'][0] * bohr_to_ang if abs(alat) < 1.e-5: raise QEOutputParsingError( "Lattice constant=0! Probably you are using an " "old Quantum ESPRESSO version?") header_dict["alat"] = alat header_dict["alat_units"] = "angstrom" except ValueError: raise QEOutputParsingError("Wrong data on line 3") starting_line = 3 if header_dict['ibrav'] == 0: if 'Basis vectors' not in data[3]: raise QEOutputParsingError( "Wrong format (no 'Basis vectors' line)") try: v1 = [float(_)*alat for _ in data[4].split()] v2 = [float(_)*alat for _ in data[5].split()] v3 = [float(_)*alat for _ in data[6].split()] if len(v1) != 3 or len(v2) != 3 or len(v3) != 3: raise QEOutputParsingError( "Wrong length for basis vectors") header_dict['lattice_vectors'] = [v1,v2,v3] header_dict['lattice_vectors_units'] = "angstrom" except ValueError: raise QEOutputParsingError("Wrong data for basis vectors") starting_line += 4 species_info = {} species = [] for idx, sp_line in enumerate( data[starting_line:starting_line + num_species], start=1): pieces = sp_line.split("'") if len(pieces) != 3: raise QEOutputParsingError( "Wrong # of elements for one of the species") try: if int(pieces[0]) != idx: raise QEOutputParsingError( "Error with the indices of the species") species.append([pieces[1].strip(), float(pieces[2])/amu_Ry]) except ValueError: raise QEOutputParsingError("Error parsing the species") masses = dict(species) header_dict['masses'] = masses atoms_coords = [] atoms_labels = [] starting_line += num_species for idx, atom_line in enumerate( data[starting_line:starting_line + num_atoms], start=1): pieces = atom_line.split() if len(pieces) != 5: raise QEOutputParsingError( "Wrong # of elements for one of the atoms: {}, " "line {}: {}".format( len(pieces), starting_line+idx, pieces)) try: if int(pieces[0]) != idx: raise QEOutputParsingError( "Error with the indices of the atoms: " "{} vs {}".format(int(pieces[0]), idx)) sp_idx = int(pieces[1]) if sp_idx > len(species): raise QEOutputParsingError("Wrong index for the species: " "{}, but max={}".format( sp_idx, len(species))) atoms_labels.append(species[sp_idx-1][0]) atoms_coords.append([float(pieces[2])*alat, float(pieces[3])*alat, float(pieces[4])*alat]) except ValueError: raise QEOutputParsingError("Error parsing the atoms") except IndexError: raise QEOutputParsingError( "Error with the indices in the atoms section") header_dict['atoms_labels'] = atoms_labels header_dict['atoms_coords'] = atoms_coords header_dict['atoms_coords_units'] = "angstrom" starting_line += num_atoms starting_line += 1 # Got to the next line to check if 'Dynamical' not in data[starting_line]: raise QEOutputParsingError( "Wrong format (no 'Dynamical Matrix' line)") ## Here I finish the header parsing except QEOutputParsingError as e: parsed_data['warnings'].append( "Problem parsing the header of the matdyn file! (msg: {}). " "Storing only the information I managed to retrieve".format( e.message)) header_dict['warnings'].append( "There was some parsing error and this dictionary is " "not complete, see the warnings of the top parsed_data dict") # I store what I got parsed_data['header'] = header_dict for line_counter,line in enumerate(data[starting_line:], start=starting_line): if 'q = ' in line: # q point is written several times, because it can also be rotated. # I consider only the first point, which is the one computed if 'q_point' not in parsed_data: q_point = [ float(i) for i in line.split('(')[1].split(')')[0].split() ] if lattice_parameter: parsed_data['q_point'] = [ e*2*numpy.pi/lattice_parameter for e in q_point] parsed_data['q_point_units'] = 'angstrom-1' else: parsed_data['q_point'] = q_point parsed_data['q_point_units'] = '2pi/lattice_parameter' if 'freq' in line or 'omega' in line: this_freq = line.split('[cm-1]')[0].split('=')[-1] # exception for bad fortran coding: *** could be written instead of the number if '*' in this_freq: frequencies.append(None) parsed_data['warnings'].append('Wrong fortran formatting found while parsing frequencies') else: frequencies.append( float(this_freq) ) this_eigenvectors = [] for new_line in data[line_counter+1:]: if ('freq' in new_line or 'omega' in new_line or '************************************************' in new_line): break this_things = new_line.split('(')[1].split(')')[0].split() try: this_flatlist = [float(i) for i in this_things] except ValueError: parsed_data['warnings'].append('Wrong fortran formatting found while parsing eigenvectors') # then save the three (xyz) complex numbers as [None,None] this_eigenvectors.append([[None,None]]*3) continue list_tuples = zip(*[iter(this_flatlist)]*2) # I save every complex number as a list of two numbers this_eigenvectors.append( [ [i[0],i[1]] for i in list_tuples ] ) eigenvectors.append(this_eigenvectors) parsed_data['frequencies'] = frequencies parsed_data['frequencies_units'] = 'cm-1' # TODO: the eigenvectors should be written in the database according to a parser_opts. # for now, we don't store them, otherwise we get too much stuff # We implement anyway the possibility to get it with an optional parameter if also_eigenvectors: parsed_data['eigenvectors'] = eigenvectors return parsed_data
def parse_ph_text_output(lines): """ Parses the stdout of QE-PH. :param lines: list of strings, the file as read by readlines() :return parsed_data: dictionary with parsed values. :return critical_messages: a list with critical messages. If any is found in parsed_data['warnings'], the calculation is FAILED! """ from aiida.parsers.plugins.quantumespresso.raw_parser_pw import parse_QE_errors parsed_data = {} parsed_data['warnings'] = [] # parse time, starting from the end # apparently, the time is written multiple times for line in reversed(lines): if 'PHONON' in line and 'WALL' in line: try: time = line.split('CPU')[1].split('WALL')[0] parsed_data['wall_time'] = time except Exception: parsed_data['warnings'].append('Error while parsing wall time.') try: parsed_data['wall_time_seconds'] = \ convert_qe_time_to_sec(parsed_data['wall_time']) except ValueError: raise QEOutputParsingError("Unable to convert wall_time in seconds.") break # parse number of q-points and number of atoms for count,line in enumerate(lines): if 'q-points for this run' in line: try: num_qpoints = int(line.split('/')[1].split('q-points')[0]) if ( 'number_of_qpoints' in parsed_data.keys() and num_qpoints != parsed_data['number_of_qpoints']): parsed_data['warnings'].append("Number q-points found " "several times with different" " values") else: parsed_data['number_of_qpoints'] = num_qpoints except Exception: parsed_data['warnings'].append("Error while parsing number of " "q points.") elif 'q-points)' in line: # case of a 'only_wfc' calculation try: num_qpoints = int(line.split('q-points')[0].split('(')[1]) if ( 'number_of_qpoints' in parsed_data.keys() and num_qpoints != parsed_data['number_of_qpoints']): parsed_data['warnings'].append("Number q-points found " "several times with different" " values") else: parsed_data['number_of_qpoints'] = num_qpoints except Exception: parsed_data['warnings'].append("Error while parsing number of " "q points.") elif "number of atoms/cell" in line: try: num_atoms = int(line.split('=')[1]) parsed_data['number_of_atoms'] = num_atoms except Exception: parsed_data['warnings'].append("Error while parsing number of " "atoms.") elif "irreducible representations" in line: if 'number_of_irr_representations_for_each_q' not in parsed_data.keys(): parsed_data['number_of_irr_representations_for_each_q'] = [] try: num_irr_repr = int(line.split('irreducible')[0].split('are')[1]) parsed_data['number_of_irr_representations_for_each_q'].append(num_irr_repr) except Exception: pass #elif "lattice parameter (alat)" in line: # lattice_parameter = float(line.split('=')[1].split('a.u.')[0])*bohr_to_ang #elif ('cell' not in parsed_data.keys() and # "crystal axes: (cart. coord. in units of alat)" in line): # cell = [ [float(e)*lattice_parameter for e in li.split("a({}) = (".format(i+1) # )[1].split(")")[0].split()] for i,li in enumerate(lines[count+1:count+4])] # parsed_data['cell'] = cell # TODO: find a more exhaustive list of the common errors of ph # critical warnings: if any is found, the calculation status is FAILED critical_warnings = {'No convergence has been achieved': 'Phonon did not reach end of self consistency', 'Maximum CPU time exceeded':'Maximum CPU time exceeded', '%%%%%%%%%%%%%%':None, } minor_warnings = {'Warning:':None, } all_warnings = dict(critical_warnings.items() + minor_warnings.items()) for count,line in enumerate(lines): if any( i in line for i in all_warnings): messages = [ all_warnings[i] if all_warnings[i] is not None else line for i in all_warnings.keys() if i in line] if '%%%%%%%%%%%%%%' in line: messages = parse_QE_errors(lines,count,parsed_data['warnings']) # if it found something, add to log if len(messages)>0: parsed_data['warnings'].extend(messages) return parsed_data,critical_warnings.values()
def parse_raw_ph_output(out_file, tensor_file=None, dynmat_files=[]): """ Parses the output of a calculation Receives in input the paths to the output file and the xml file. Args: out_file path to ph std output Returns: out_dict a dictionary with parsed data successful a boolean that is False in case of failed calculations Raises: QEOutputParsingError for errors in the parsing 2 different keys to check in output: parser_warnings and warnings. On an upper level, these flags MUST be checked. The first two are expected to be empty unless QE failures or unfinished jobs. """ job_successful = True parser_version = '0.1' parser_info = {} parser_info['parser_warnings'] = [] parser_info['parser_info'] = 'AiiDA QE-PH Parser v{}'.format(parser_version) # load QE out file try: with open(out_file,'r') as f: out_lines = f.readlines() except IOError: # if the file cannot be open, the error is severe. raise QEOutputParsingError("Failed to open output file: {}.".format(out_file)) # in case of executable failures, check if there is any output at all if not out_lines: job_successful = False # check if the job has finished (that doesn't mean without errors) finished_run = False for line in out_lines[::-1]: if 'JOB DONE' in line: finished_run = True break if not finished_run: warning = 'QE ph run did not reach the end of the execution.' parser_info['parser_warnings'].append(warning) job_successful = False # parse tensors, if present tensor_data = {} if tensor_file: with open(tensor_file,'r') as f: tensor_lines = f.read() try: tensor_data = parse_ph_tensor(tensor_lines) except QEOutputParsingError: parser_info['parser_warnings'].append('Error while parsing the tensor files') pass # parse ph output with open(out_file,'r') as f: out_lines = f.readlines() out_data,critical_messages = parse_ph_text_output(out_lines) # if there is a severe error, the calculation is FAILED if any([x in out_data['warnings'] for x in critical_messages]): job_successful = False # parse dynamical matrices if present dynmat_data = {} if dynmat_files: # find lattice parameter for dynmat_counter,this_dynmat in enumerate(dynmat_files): # read it with open(this_dynmat,'r') as f: lines = f.readlines() # check if the file contains frequencies (i.e. is useful) or not dynmat_to_parse = False if not lines: continue try: _ = [ float(i) for i in lines[0].split()] except ValueError: dynmat_to_parse = True if not dynmat_to_parse: continue # parse it this_dynmat_data = parse_ph_dynmat(lines) # join it with the previous dynmat info dynmat_data['dynamical_matrix_%s' % dynmat_counter] = this_dynmat_data # TODO: use the bands format? # join dictionaries, there should not be any twice repeated key for key in out_data.keys(): if key in tensor_data.keys(): raise AssertionError('{} found in two dictionaries'.format(key)) for key in out_data.keys(): if key in dynmat_data.keys(): if key=='warnings': # this ke can be found in both, but is not a problem out_data['warnings'] += dynmat_data['warnings'] del dynmat_data['warnings'] else: raise AssertionError('{} found in two dictionaries'.format(key)) # I don't check the dynmat_data and parser_info keys final_data = dict(dynmat_data.items() + out_data.items() + tensor_data.items() + parser_info.items()) return final_data,job_successful
def xml_card_ions(parsed_data, dom, lattice_vectors, volume): cardname = 'IONS' target_tags = read_xml_card(dom, cardname) for tagname in ['NUMBER_OF_ATOMS', 'NUMBER_OF_SPECIES']: parsed_data[tagname.lower()] = parse_xml_child_integer( tagname, target_tags) tagname = 'UNITS_FOR_ATOMIC_MASSES' attrname = 'UNITS' parsed_data[tagname.lower()] = parse_xml_child_attribute_str( tagname, attrname, target_tags) try: parsed_data['species'] = {} parsed_data['species']['index'] = [] parsed_data['species']['type'] = [] parsed_data['species']['mass'] = [] parsed_data['species']['pseudo'] = [] for i in range(parsed_data['number_of_species']): tagname = 'SPECIE.' + str(i + 1) parsed_data['species']['index'].append(i + 1) # a=target_tags.getElementsByTagName(tagname)[0] a = [_ for _ in target_tags.childNodes if _.nodeName == tagname][0] tagname2 = 'ATOM_TYPE' parsed_data['species']['type'].append( parse_xml_child_str(tagname2, a)) tagname2 = 'MASS' parsed_data['species']['mass'].append( parse_xml_child_float(tagname2, a)) tagname2 = 'PSEUDO' parsed_data['species']['pseudo'].append( parse_xml_child_str(tagname2, a)) tagname = 'UNITS_FOR_ATOMIC_POSITIONS' attrname = 'UNITS' parsed_data[tagname.lower()] = parse_xml_child_attribute_str( tagname, attrname, target_tags) except: raise QEOutputParsingError('Error parsing tag SPECIE.# inside %s.' % (target_tags.tagName)) # TODO convert the units # if parsed_data['units_for_atomic_positions'] not in ['alat','bohr','angstrom']: try: atomlist = [] atoms_index_list = [] atoms_if_pos_list = [] tagslist = [] for i in range(parsed_data['number_of_atoms']): tagname = 'ATOM.' + str(i + 1) # USELESS AT THE MOMENT, I DON'T SAVE IT # parsed_data['atoms']['list_index']=i # a=target_tags.getElementsByTagName(tagname)[0] a = [_ for _ in target_tags.childNodes if _.nodeName == tagname][0] tagname2 = 'INDEX' b = int(a.getAttribute(tagname2)) atoms_index_list.append(b) tagname2 = 'SPECIES' chem_symbol = str(a.getAttribute(tagname2)).rstrip().replace( "\n", "") # I check if it is a subspecie chem_symbol_digits = "".join( [i for i in chem_symbol if i in string.digits]) try: tagslist.append(int(chem_symbol_digits)) except ValueError: # If I can't parse the digit, it is probably not there: I add a None to the tagslist tagslist.append(None) # I remove the symbols chem_symbol = chem_symbol.translate(None, string.digits) tagname2 = 'tau' b = a.getAttribute(tagname2) tau = [float(s) for s in b.rstrip().replace("\n", "").split()] metric = parsed_data['units_for_atomic_positions'] if metric not in ['alat', 'bohr', 'angstrom']: # REMEMBER TO CONVERT AT THE END raise QEOutputParsingError('Error parsing tag %s inside %s' % (tagname, target_tags.tagName)) if metric == 'alat': tau = [ parsed_data['lattice_parameter_xml'] * float(s) for s in tau ] elif metric == 'bohr': tau = [bohr_to_ang * float(s) for s in tau] atomlist.append([chem_symbol, tau]) tagname2 = 'if_pos' b = a.getAttribute(tagname2) if_pos = [int(s) for s in b.rstrip().replace("\n", "").split()] atoms_if_pos_list.append(if_pos) parsed_data['atoms'] = atomlist parsed_data['atoms_index_list'] = atoms_index_list parsed_data['atoms_if_pos_list'] = atoms_if_pos_list cell = {} cell['lattice_vectors'] = lattice_vectors cell['volume'] = volume cell['atoms'] = atomlist cell['tagslist'] = tagslist parsed_data['cell'] = cell except Exception: raise QEOutputParsingError('Error parsing tag ATOM.# inside %s.' % (target_tags.tagName)) # saving data together with cell parameters. Did so for better compatibility with ASE. # correct some units that have been converted in parsed_data['atomic_positions' + units_suffix] = default_length_units parsed_data['direct_lattice_vectors' + units_suffix] = default_length_units return parsed_data
def grep_energy_from_line(line): try: return float(line.split('=')[1].split('Ry')[0]) * ry_to_ev except Exception: raise QEOutputParsingError('Error while parsing energy')
def xml_card_cell(parsed_data, dom): # CARD CELL of QE output cardname = 'CELL' target_tags = read_xml_card(dom, cardname) for tagname in ['NON-PERIODIC_CELL_CORRECTION', 'BRAVAIS_LATTICE']: parsed_data[tagname.replace('-', '_').lower()] = parse_xml_child_str( tagname, target_tags) tagname = 'LATTICE_PARAMETER' value = parse_xml_child_float(tagname, target_tags) parsed_data[tagname.replace('-', '_').lower() + '_xml'] = value attrname = 'UNITS' metric = parse_xml_child_attribute_str(tagname, attrname, target_tags) if metric not in ['bohr', 'angstrom']: raise QEOutputParsingError( 'Error parsing attribute {}, tag {} inside {}, units not found'. format(attrname, tagname, target_tags.tagName)) if metric == 'bohr': value *= bohr_to_ang parsed_data[tagname.replace('-', '_').lower()] = value tagname = 'CELL_DIMENSIONS' try: #a=target_tags.getElementsByTagName(tagname)[0] a = [_ for _ in target_tags.childNodes if _.nodeName == tagname][0] b = a.childNodes[0] c = b.data.replace('\n', '').split() value = [float(i) for i in c] parsed_data[tagname.replace('-', '_').lower()] = value except Exception: raise QEOutputParsingError('Error parsing tag {} inside {}.'.format( tagname, target_tags.tagName)) tagname = 'DIRECT_LATTICE_VECTORS' lattice_vectors = [] try: second_tagname = 'UNITS_FOR_DIRECT_LATTICE_VECTORS' #a=target_tags.getElementsByTagName(tagname)[0] a = [_ for _ in target_tags.childNodes if _.nodeName == tagname][0] b = a.getElementsByTagName('UNITS_FOR_DIRECT_LATTICE_VECTORS')[0] value = str(b.getAttribute('UNITS')).lower() parsed_data[second_tagname.replace('-', '_').lower()] = value metric = value if metric not in [ 'bohr', 'angstroms' ]: # REMEMBER TO CHECK THE UNITS AT THE END OF THE FUNCTION raise QEOutputParsingError( 'Error parsing tag {} inside {}: units not supported: {}'. format(tagname, target_tags.tagName, metric)) lattice_vectors = [] for second_tagname in ['a1', 'a2', 'a3']: #b = a.getElementsByTagName(second_tagname)[0] b = [_ for _ in a.childNodes if _.nodeName == second_tagname][0] c = b.childNodes[0] d = c.data.replace('\n', '').split() value = [float(i) for i in d] if metric == 'bohr': value = [bohr_to_ang * float(s) for s in value] lattice_vectors.append(value) volume = cell_volume(lattice_vectors[0], lattice_vectors[1], lattice_vectors[2]) except Exception: raise QEOutputParsingError( 'Error parsing tag {} inside {} inside {}.'.format( tagname, target_tags.tagName, cardname)) # NOTE: lattice_vectors will be saved later together with card IONS.atom tagname = 'RECIPROCAL_LATTICE_VECTORS' try: #a = target_tags.getElementsByTagName(tagname)[0] a = [_ for _ in target_tags.childNodes if _.nodeName == tagname][0] second_tagname = 'UNITS_FOR_RECIPROCAL_LATTICE_VECTORS' b = a.getElementsByTagName(second_tagname)[0] value = str(b.getAttribute('UNITS')).lower() parsed_data[second_tagname.replace('-', '_').lower()] = value metric = value # NOTE: output is given in 2 pi / a [ang ^ -1] if metric not in ['2 pi / a']: raise QEOutputParsingError( 'Error parsing tag {} inside {}: units {} not supported'. format(tagname, target_tags.tagName, metric)) # reciprocal_lattice_vectors this_matrix = [] for second_tagname in ['b1', 'b2', 'b3']: b = a.getElementsByTagName(second_tagname)[0] c = b.childNodes[0] d = c.data.replace('\n', '').split() value = [float(i) for i in d] if metric == '2 pi / a': value = [ float(s) / parsed_data['lattice_parameter'] for s in value ] this_matrix.append(value) parsed_data['reciprocal_lattice_vectors'] = this_matrix except Exception: raise QEOutputParsingError('Error parsing tag {} inside {}.'.format( tagname, target_tags.tagName)) return parsed_data, lattice_vectors, volume
def parse_cp_text_output(data, xml_data): """ data must be a list of strings, one for each lines, as returned by readlines(). On output, a dictionary with parsed values """ # TODO: uniform readlines() and read() usage for passing input to the parser parsed_data = {} parsed_data['warnings'] = [] for count, line in enumerate(data): if 'warning' in line.lower(): parsed_data['warnings'].append(line) elif 'bananas' in line: parsed_data['warnings'].append('Bananas from the ortho.') elif 'CP' in line and 'WALL' in line: try: time = line.split('CPU')[1].split('WALL')[0] parsed_data['wall_time'] = time except: raise QEOutputParsingError('Error while parsing wall time.') for count, line in enumerate(reversed(data)): if 'nfi' in line and 'ekinc' in line and 'econs' in line: this_line = data[len(data) - count] try: parsed_data['ekinc'] = [float(this_line.split()[1])] except ValueError: pass try: parsed_data['temph'] = [float(this_line.split()[2])] except ValueError: pass try: parsed_data['tempp'] = [float(this_line.split()[3])] except ValueError: pass try: parsed_data['etot'] = [float(this_line.split()[4])] except ValueError: pass try: parsed_data['enthal'] = [float(this_line.split()[5])] except ValueError: pass try: parsed_data['econs'] = [float(this_line.split()[6])] except ValueError: pass try: parsed_data['econt'] = [float(this_line.split()[7])] except ValueError: pass try: parsed_data['vnhh'] = [float(this_line.split()[8])] except (ValueError, IndexError): pass try: parsed_data['xnhh0'] = [float(this_line.split()[9])] except (ValueError, IndexError): pass try: parsed_data['vnhp'] = [float(this_line.split()[10])] except (ValueError, IndexError): pass try: parsed_data['xnhp0'] = [float(this_line.split()[11])] except (ValueError, IndexError): pass return parsed_data
def parse_neb_text_output(data, input_dict={}): """ Parses the text output of QE Neb. :param data: a string, the file as read by read() :param input_dict: dictionary with the input parameters :return parsed_data: dictionary with key values, referring to quantities at the last step. :return iteration_data: key,values referring to intermediate iterations. Empty dictionary if no value is present. :return critical_messages: a list with critical messages. If any is found in parsed_data['warnings'], the calculation is FAILED! """ from aiida.parsers.plugins.quantumespresso.raw_parser_pw import parse_QE_errors from collections import defaultdict # TODO: find a more exhaustive list of the common errors of neb # critical warnings: if any is found, the calculation status is FAILED critical_warnings = { 'scf convergence NOT achieved on image': 'SCF did not converge for a given image', 'Maximum CPU time exceeded': 'Maximum CPU time exceeded', 'reached the maximum number of steps': 'Maximum number of iterations reached in the image optimization', '%%%%%%%%%%%%%%': None, } minor_warnings = { 'Warning:': None, } all_warnings = dict(critical_warnings.items() + minor_warnings.items()) parsed_data = {} parsed_data['warnings'] = [] iteration_data = defaultdict(list) # parse time, starting from the end # apparently, the time is written multiple times for line in reversed(data.split('\n')): if 'NEB' in line and 'WALL' in line: try: time = line.split('CPU')[1].split('WALL')[0].strip() parsed_data['wall_time'] = time except Exception: parsed_data['warnings'].append( 'Error while parsing wall time.') try: parsed_data['wall_time_seconds'] = \ convert_qe_time_to_sec(parsed_data['wall_time']) except ValueError: raise QEOutputParsingError( "Unable to convert wall_time in seconds.") break # set by default the calculation as not converged. parsed_data['converged'] = [False, 0] for count, line in enumerate(data.split('\n')): if 'initial path length' in line: initial_path_length = float(line.split('=')[1].split('bohr')[0]) parsed_data[ 'initial_path_length'] = initial_path_length * bohr_to_ang elif 'initial inter-image distance' in line: initial_image_dist = float(line.split('=')[1].split('bohr')[0]) parsed_data[ 'initial_image_dist'] = initial_image_dist * bohr_to_ang elif 'string_method' in line: parsed_data['string_method'] = line.split('=')[1].strip() elif 'restart_mode' in line: parsed_data['restart_mode'] = line.split('=')[1].strip() elif 'opt_scheme' in line: parsed_data['opt_scheme'] = line.split('=')[1].strip() elif 'num_of_images' in line: parsed_data['num_of_images'] = int(line.split('=')[1]) elif 'nstep_path' in line: parsed_data['nstep_path'] = int(line.split('=')[1]) elif 'CI_scheme' in line: parsed_data['ci_scheme'] = line.split('=')[1].strip() elif 'first_last_opt' in line: parsed_data['first_last_opt'] = True if line.split( '=')[1] == 'T' else False elif 'use_freezing' in line: parsed_data['use_freezing'] = True if line.split( '=')[1] == 'T' else False elif ' ds ' in line: parsed_data['ds_au'] = float(line.split('=')[1].split('a.u.')[0]) elif ' k_max' in line: parsed_data['k_max'] = float(line.split('=')[1].split('a.u.')[0]) elif ' k_min_au' in line: parsed_data['k_min_au'] = float( line.split('=')[1].split('a.u.')[0]) elif 'suggested k_max' in line: parsed_data['suggested_k_max_au'] = float( line.split('=')[1].split('a.u.')[0]) elif 'suggested k_min' in line: parsed_data['suggested_k_min_au'] = float( line.split('=')[1].split('a.u.')[0]) elif 'path_thr' in line: parsed_data['path_thr'] = float(line.split('=')[1].split('eV')[0]) elif 'list of climbing images' in line: parsed_data['climbing_images_manual'] = [ int(_) for _ in line.split(':')[1].split(',')[:-1] ] elif 'neb: convergence achieved in' in line: parsed_data['converged'] = [ True, int(line.split('iteration')[0].split()[-1]) ] elif any(i in line for i in all_warnings): message = [ all_warnings[i] for i in all_warnings.keys() if i in line ][0] if message is None: message = line if '%%%%%%%%%%%%%%' in line: message = None messages = parse_QE_errors(data.split('\n'), count, parsed_data['warnings']) # if it found something, add to log try: parsed_data['warnings'].extend(messages) except UnboundLocalError: pass if message is not None: parsed_data['warnings'].append(message) try: num_images = parsed_data['num_of_images'] except KeyError: try: num_images = input_dict['PATH']['num_of_images'] except KeyError: raise QEOutputParsingError( "No information on the number " "of images available (neither in input nor in output") iteration_lines = data.split('-- iteration')[1:] iteration_lines = [i.split('\n') for i in iteration_lines] for iteration in iteration_lines: for count, line in enumerate(iteration): if 'activation energy (->)' in line: activ_energy = float(line.split('=')[1].split('eV')[0]) iteration_data['forward_activation_energy'].append( activ_energy) elif 'activation energy (<-)' in line: activ_energy = float(line.split('=')[1].split('eV')[0]) iteration_data['backward_activation_energy'].append( activ_energy) elif 'image energy (eV) error (eV/A) frozen' in line: energies = [] forces = [] frozen = [] try: for i in range(num_images): split_line = iteration[count + 2 + i].split()[1:] energies.append(float(split_line[0])) forces.append(float(split_line[1])) frozen.append(True if split_line[2] == 'T' else False) iteration_data['image_energies'].append(energies) iteration_data['image_forces'].append(forces) iteration_data['image_frozen'].append(frozen) except Exception: parsed_data['warnings'].append( 'Error while parsing the image energies and forces.') elif 'climbing image' in line: iteration_data['climbing_image_auto'].append( [int(_) for _ in line.split('=')[1].split(',')]) elif 'path length' in line: path_length = float(line.split('=')[1].split('bohr')[0]) iteration_data['path_length'].append(path_length * bohr_to_ang) elif 'inter-image distance' in line: image_dist = float(line.split('=')[1].split('bohr')[0]) iteration_data['image_dist'].append(image_dist * bohr_to_ang) return parsed_data, dict(iteration_data), critical_warnings.values()
def parse_pw_text_output(data, xml_data=None, structure_data=None, input_dict=None): """ Parses the text output of QE-PWscf. :param data: a string, the file as read by read() :param xml_data: the dictionary with the keys read from xml. :param structure_data: dictionary, coming from the xml, with info on the structure :return parsed_data: dictionary with key values, referring to quantities at the last scf step. :return trajectory_data: key,values referring to intermediate scf steps, as in the case of vc-relax. Empty dictionary if no value is present. :return critical_messages: a list with critical messages. If any is found in parsed_data['warnings'], the calculation is FAILED! """ parsed_data = {} parsed_data['warnings'] = [] vdw_correction = False trajectory_data = {} # critical warnings: if any is found, the calculation status is FAILED critical_warnings = { 'The maximum number of steps has been reached.': "The maximum step of the ionic/electronic relaxation has been reached.", 'convergence NOT achieved after': "The scf cycle did not reach convergence.", # 'eigenvalues not converged':None, # special treatment 'iterations completed, stopping': 'Maximum number of iterations reached in Wentzcovitch Damped Dynamics.', 'Maximum CPU time exceeded': 'Maximum CPU time exceeded', '%%%%%%%%%%%%%%': None, } minor_warnings = { 'Warning:': None, 'DEPRECATED:': None, 'incommensurate with FFT grid': 'The FFT is incommensurate: some symmetries may be lost.', 'SCF correction compared to forces is too large, reduce conv_thr': "Forces are inaccurate (SCF correction is large): reduce conv_thr.", } all_warnings = dict(critical_warnings.items() + minor_warnings.items()) # Find some useful quantities. try: for line in data.split('\n'): if 'lattice parameter (alat)' in line: alat = float(line.split('=')[1].split('a.u')[0]) elif 'number of atoms/cell' in line: nat = int(line.split('=')[1]) elif 'number of atomic types' in line: ntyp = int(line.split('=')[1]) elif 'unit-cell volume' in line: volume = float(line.split('=')[1].split('(a.u.)^3')[0]) elif 'number of Kohn-Sham states' in line: nbnd = int(line.split('=')[1]) break alat *= bohr_to_ang volume *= bohr_to_ang**3 parsed_data['number_of_bands'] = nbnd except NameError: # nat or other variables where not found, and thus not initialized # try to get some error message for count, line in enumerate(data.split('\n')): if any(i in line for i in all_warnings): messages = [ all_warnings[i] if all_warnings[i] is not None else line for i in all_warnings.keys() if i in line ] if '%%%%%%%%%%%%%%' in line: messages = parse_QE_errors(data.split('\n'), count, parsed_data['warnings']) # if it found something, add to log if len(messages) > 0: parsed_data['warnings'].extend(messages) if len(parsed_data['warnings']) > 0: return parsed_data, trajectory_data, critical_warnings.values() else: # did not find any error message -> raise an Error and do not # return anything raise QEOutputParsingError("Parser can't load basic info.") # Save these two quantities in the parsed_data, because they will be # useful for queries (maybe), and structure_data will not be stored as a ParameterData parsed_data['number_of_atoms'] = nat parsed_data['number_of_species'] = ntyp parsed_data['volume'] = volume c_bands_error = False # now grep quantities that can be considered isolated informations. for count, line in enumerate(data.split('\n')): # special parsing of c_bands error if 'c_bands' in line and 'eigenvalues not converged' in line: c_bands_error = True elif "iteration #" in line and c_bands_error: # if there is another iteration, c_bands is not necessarily a problem # I put a warning only if c_bands error appears in the last iteration c_bands_error = False # Parsing of errors elif any(i in line for i in all_warnings): message = [ all_warnings[i] for i in all_warnings.keys() if i in line ][0] if message is None: message = line # if the run is a molecular dynamics, I ignore that I reached the # last iteration step. if ('The maximum number of steps has been reached.' in line and 'md' in input_dict['CONTROL']['calculation']): message = None if 'iterations completed, stopping' in line: value = message message = None if 'Wentzcovitch Damped Dynamics:' in line: dynamic_iterations = int(line.split()[3]) if max_dynamic_iterations == dynamic_iterations: message = value if '%%%%%%%%%%%%%%' in line: message = None messages = parse_QE_errors(data.split('\n'), count, parsed_data['warnings']) # if it found something, add to log try: parsed_data['warnings'].extend(messages) except UnboundLocalError: pass if message is not None: parsed_data['warnings'].append(message) if c_bands_error: parsed_data['warnings'].append( "c_bands: at least 1 eigenvalues not converged") # I split the output text in the atomic SCF calculations. # the initial part should be things already contained in the xml. # (cell, initial positions, kpoints, ...) and I skip them. # In case, parse for them before this point. # Put everything in a trajectory_data dictionary relax_steps = data.split('Self-consistent Calculation')[1:] relax_steps = [i.split('\n') for i in relax_steps] # now I create a bunch of arrays for every step. for data_step in relax_steps: for count, line in enumerate(data_step): # NOTE: in the above, the chemical symbols are not those of AiiDA # since the AiiDA structure is different. So, I assume now that the # order of atoms is the same of the input atomic structure. # Computed dipole correction in slab geometries. # save dipole in debye units, only at last iteration of scf cycle # grep energy and eventually, magnetization if '!' in line: if 'makov-payne' in line.lower(): try: for key in ['total', 'envir']: if key in line.lower(): En = float(line.split('=')[1].split('Ry') [0]) * ry_to_ev try: trajectory_data[key + '_makov-payne'].append(En) except KeyError: trajectory_data[key + '_makov-payne'] = [En] parsed_data[ key + '_makov-payne' + units_suffix] = default_energy_units except Exception: parsed_data['warnings'].append( 'Error while parsing the energy') else: try: for key in ['energy', 'energy_accuracy']: if key not in trajectory_data: trajectory_data[key] = [] En = float( line.split('=')[1].split('Ry')[0]) * ry_to_ev E_acc = float(data_step[count + 2].split('<')[1].split( 'Ry')[0]) * ry_to_ev for key, value in [['energy', En], ['energy_accuracy', E_acc]]: trajectory_data[key].append(value) parsed_data[key + units_suffix] = default_energy_units except Exception: parsed_data['warnings'].append( 'Error while parsing the energy') elif 'the Fermi energy is' in line: try: value = line.split('is')[1].split('ev')[0] try: trajectory_data['fermi_energy'].append(value) except KeyError: trajectory_data['fermi_energy'] = [value] parsed_data['fermi_energy' + units_suffix] = default_energy_units except Exception: parsed_data['warnings'].append( 'Error while parsing Fermi energy from the output file.' ) elif 'Forces acting on atoms (Ry/au):' in line: try: forces = [] j = 0 while True: j += 1 line2 = data_step[count + j] if 'atom ' in line2: line2 = line2.split('=')[1].split() # CONVERT FORCES IN eV/Ang vec = [float(s) * ry_to_ev / \ bohr_to_ang for s in line2] forces.append(vec) if len(forces) == nat: break try: trajectory_data['forces'].append(forces) except KeyError: trajectory_data['forces'] = [forces] parsed_data['forces' + units_suffix] = default_force_units except Exception: parsed_data['warnings'].append( 'Error while parsing forces.') # TODO: adding the parsing support for the decomposition of the forces elif 'Total force =' in line: try: # note that I can't check the units: not written in output! value = float(line.split('=')[1].split('Total') [0]) * ry_to_ev / bohr_to_ang try: trajectory_data['total_force'].append(value) except KeyError: trajectory_data['total_force'] = [value] parsed_data['total_force' + units_suffix] = default_force_units except Exception: parsed_data['warnings'].append( 'Error while parsing total force.') elif 'entering subroutine stress ...' in line: try: stress = [] for k in range(10): if "P=" in data_step[count + k + 1]: count2 = count + k + 1 if '(Ry/bohr**3)' not in data_step[count2]: raise QEOutputParsingError( 'Error while parsing stress: unexpected units.') for k in range(3): line2 = data_step[count2 + k + 1].split() vec = [ float(s) * 10**(-9) * ry_si / (bohr_si)**3 for s in line2[0:3] ] stress.append(vec) try: trajectory_data['stress'].append(stress) except KeyError: trajectory_data['stress'] = [stress] parsed_data['stress' + units_suffix] = default_stress_units except Exception: parsed_data['warnings'].append( 'Error while parsing stress tensor.') return parsed_data, trajectory_data, critical_warnings.values()
def parse_raw_out_basic(out_file, calc_name): """ A very simple parser for the standard out, usually aiida.out. Currently only parses basic warnings and the walltime. :param out_file: the standard out to be parsed :param calc_name: the name of the calculation, e.g. PROJWFC :return: parsed_data """ # read file parsed_data = {} parsed_data['warnings'] = [] # critical warnings: if any is found, the calculation status is FAILED critical_warnings = {'Maximum CPU time exceeded':'Maximum CPU time exceeded', '%%%%%%%%%%%%%%':None, } minor_warnings = {'Warning:':None, 'DEPRECATED:':None, } all_warnings = dict(critical_warnings.items() + minor_warnings.items()) for count in range (len(out_file)): line = out_file[count] # parse the global file, for informations that are written only once if calc_name in line and 'WALL' in line: try: time = line.split('CPU')[1].split('WALL')[0] cpu_time = line.split(':')[1].split('CPU')[0] parsed_data['wall_time'] = time parsed_data['cpu_time'] = cpu_time except ValueError: parsed_data['warnings'].append('Error while parsing wall time.') try: parsed_data['wall_time_seconds'] = convert_qe_time_to_sec(time) parsed_data['cpu_time_seconds'] = convert_qe_time_to_sec(cpu_time) except ValueError: raise QEOutputParsingError("Unable to convert wall_time in seconds.") # Parsing of errors elif any( i in line for i in all_warnings): message = [ all_warnings[i] for i in all_warnings.keys() if i in line][0] if message is None: message = line if '%%%%%%%%%%%%%%' in line: message = None messages = parse_QE_errors(out_file,count,parsed_data['warnings']) # if it found something, add to log try: parsed_data['warnings'].extend(messages) except UnboundLocalError: pass if message is not None: parsed_data['warnings'].append(message) elif 'Fermi energy' in line and '=' in line: fermi_energy = line.split('=')[1].split('eV')[0] parsed_data['fermi_energy'] = fermi_energy parsed_data['fermi_energy_units'] = 'eV' elif 'Drude plasma frequency (xx)' in line: drude_plasma_freq_xx = line.split('=')[1].split('eV')[0] parsed_data['drude_plasma_frequency_xx'] = drude_plasma_freq_xx parsed_data['drude_plasma_frequency_units'] = 'eV' elif 'Drude plasma frequency (yy)' in line: drude_plasma_freq_yy = line.split('=')[1].split('eV')[0] parsed_data['drude_plasma_frequency_yy'] = drude_plasma_freq_yy elif 'Drude plasma frequency (zz)' in line: drude_plasma_freq_zz = line.split('=')[1].split('eV')[0] parsed_data['drude_plasma_frequency_zz'] = drude_plasma_freq_zz elif 'Drude plasma frequency (xy)' in line: drude_plasma_freq_xy = line.split('=')[1].split('eV')[0] parsed_data['drude_plasma_frequency_xy'] = drude_plasma_freq_xy elif 'Drude plasma frequency (xz)' in line: drude_plasma_freq_xz = line.split('=')[1].split('eV')[0] parsed_data['drude_plasma_frequency_xz'] = drude_plasma_freq_xz elif 'Drude plasma frequency (yz)' in line: drude_plasma_freq_yz = line.split('=')[1].split('eV')[0] parsed_data['drude_plasma_frequency_yz'] = drude_plasma_freq_yz return parsed_data
def _parse_bands_and_projections(self, out_info_dict): """ Function that parsers the standard out into bands and projection data. :param standard_out: standard out file in form of a list :param out_info_dict: used to pass technical internal variables to helper functions in compact form :return: append_nodes_list a list containing BandsData and ProjectionData parsed from standard_out """ out_file = out_info_dict["out_file"] out_info_dict["k_lines"] = [] out_info_dict["e_lines"] = [] out_info_dict["psi_lines"] = [] out_info_dict["wfc_lines"] = [] append_nodes_list = [] for i in range(len(out_file)): if "k =" in out_file[i]: out_info_dict["k_lines"].append(copy.deepcopy(i)) if "==== e(" in out_file[i]: out_info_dict["e_lines"].append(i) if "|psi|^2" in out_file[i]: out_info_dict["psi_lines"].append(i) if "state #" in out_file[i]: out_info_dict["wfc_lines"].append(i) #Basic check if len(out_info_dict["e_lines"]) != len(out_info_dict["psi_lines"]): raise QEOutputParsingError("Not formatted in a manner " " that can be handled") if len(out_info_dict["psi_lines"]) % len(out_info_dict["k_lines"]) != 0: raise QEOutputParsingError("Band Energy Points is not " " a multiple of kpoints") #calculates the number of bands out_info_dict["num_bands"] = len( out_info_dict["psi_lines"])/len(out_info_dict["k_lines"]) # Uses the parent input parameters, and checks if the parent used # spin calculations try to replace with a query, if possible. parent_remote = self._calc.get_inputs_dict()['parent_calc_folder'] parent_calc = parent_remote.get_inputs_dict()['remote_folder'] out_info_dict["parent_calc"] = parent_calc parent_param = parent_calc.get_outputs_dict()['output_parameters'] try: structure = parent_calc.get_inputs_dict()['structure'] except KeyError: raise ValueError("The parent had no structure! Cannot parse" "from this!") try : nspin = parent_param.get_dict()['number_of_spin_components'] if nspin != 1: spin = True else: spin = False except KeyError: spin = False out_info_dict["spin"] = spin #changes k-numbers to match spin #because if spin is on, k points double for up and down out_info_dict["k_states"] = len(out_info_dict["k_lines"]) if spin: if out_info_dict["k_states"] % 2 != 0: raise ValueError("Internal formatting error regarding spin") out_info_dict["k_states"] = out_info_dict["k_states"]/2 # adds in the k-vector for each kpoint k_vect = [out_file[out_info_dict["k_lines"][i]].split()[2:] for i in range(out_info_dict["k_states"])] out_info_dict["k_vect"] = np.array(k_vect) out_info_dict["structure"] = structure out_info_dict["orbitals"] = find_orbitals_from_statelines(out_info_dict) if spin: # I had to guess what the ordering of the spin is, because # the projwfc.x documentation doesn't say, but looking at the # source code I found: # # DO is=1,nspin # IF (nspin==2) THEN # IF (is==1) filename=trim(filproj)//'.up' # IF (is==2) filename=trim(filproj)//'.down' # # Which would say that it is reasonable to assume that the # spin up states are written first, then spin down # out_info_dict["spin_down"] = False bands_data1, projection_data1 = spin_dependent_subparcer( out_info_dict) append_nodes_list += [("projections_up", projection_data1), ("bands_up", bands_data1)] out_info_dict["spin_down"] = True bands_data2, projection_data2 = spin_dependent_subparcer( out_info_dict) append_nodes_list += [("projections_down", projection_data2), ("bands_down", bands_data2)] else: out_info_dict["spin_down"] = False bands_data, projection_data = spin_dependent_subparcer( out_info_dict) append_nodes_list += [("projections", projection_data), ("bands", bands_data)] return append_nodes_list
def parse_raw_output_neb(out_file, input_dict, parser_opts=None): """ Parses the output of a neb calculation Receives in input the paths to the output file. :param out_file: path to neb std output :param input_dict: dictionary with the neb input parameters :param parser_opts: not used :return parameter_data: a dictionary with parsed parameters :return iteration_data: a dictionary with arrays (for relax & md calcs.) :return structure_data: a dictionary with data for the output structure :return job_successful: a boolean that is False in case of failed calculations :raises QEOutputParsingError: for errors in the parsing, :raises AssertionError: if two keys in the parsed dicts are found to be qual 2 different keys to check in output: parser_warnings and warnings. On an upper level, these flags MUST be checked. The first is expected to be empty unless QE failures or unfinished jobs. """ import copy job_successful = True parser_version = '0.1' parser_info = {} parser_info['parser_warnings'] = [] parser_info['parser_info'] = 'AiiDA QE Parser v{}'.format(parser_version) # load NEB out file try: with open(out_file, 'r') as f: out_lines = f.read() except IOError: # non existing output file -> job crashed raise QEOutputParsingError( "Failed to open output file: {}.".format(out_file)) if not out_lines: # there is an output file, but it's empty -> crash job_successful = False # check if the job has finished (that doesn't mean without errors) finished_run = False for line in out_lines.split('\n')[::-1]: if 'JOB DONE' in line: finished_run = True break if not finished_run: # error if the job has not finished warning = 'QE neb run did not reach the end of the execution.' parser_info['parser_warnings'].append(warning) job_successful = False # parse the text output of the neb calculation try: out_data, iteration_data, critical_messages = parse_neb_text_output( out_lines, input_dict) except QEOutputParsingError: if not finished_run: # I try to parse it as much as possible parser_info['parser_warnings'].append( 'Error while parsing the output file') out_data = {} iteration_data = {} critical_messages = [] else: # if it was finished and I got error, it's a mistake of the parser raise QEOutputParsingError('Error while parsing NEB output') # I add in the out_data all the last elements of iteration_data values. # I leave the possibility to skip some large arrays (None for the time being). skip_keys = [] tmp_iteration_data = copy.copy(iteration_data) for x in tmp_iteration_data.iteritems(): if x[0] in skip_keys: continue out_data[x[0]] = x[1][-1] # if there is a severe error, the calculation is FAILED if any([x in out_data['warnings'] for x in critical_messages]): job_successful = False parameter_data = dict(out_data.items() + parser_info.items()) # return various data. # parameter data will be mapped in ParameterData # iteration_data in ArrayData return parameter_data, iteration_data, job_successful
def parse_raw_output(out_file, input_dict, parser_opts=None, xml_file=None, dir_with_bands=None): """ Parses the output of a calculation Receives in input the paths to the output file and the xml file. :param out_file: path to pw std output :param input_dict: not used :param parser_opts: not used :param dir_with_bands: path to directory with all k-points (Kxxxxx) folders :param xml_file: path to QE data-file.xml :returns out_dict: a dictionary with parsed data :return successful: a boolean that is False in case of failed calculations :raises aiida.parsers.plugins.quantumespresso.QEOutputParsingError: for errors in the parsing, :raises AssertionError: if two keys in the parsed dicts are found to be qual 3 different keys to check in output: parser_warnings, xml_warnings and warnings. On an upper level, these flags MUST be checked. The first two are expected to be empty unless QE failures or unfinished jobs. """ import copy # TODO: a lot of ifs could be cleaned out # TODO: input_dict should be used as well job_successful = True parser_version = '0.1' parser_info = {} parser_info['parser_warnings'] = [] parser_info['parser_info'] = 'AiiDA QE Basic Parser v{}'.format( parser_version) # if xml_file is not given in input, skip its parsing if xml_file is not None: try: with open(xml_file, 'r') as f: xml_lines = f.read() # Note: read() and not readlines() except IOError: raise QEOutputParsingError( "Failed to open xml file: {}.".format(xml_file)) xml_data, structure_data = parse_pw_xml_output(xml_lines, dir_with_bands) # Note the xml file should always be consistent. else: parser_info['parser_warnings'].append( 'Skipping the parsing of the xml file.') xml_data = {} bands_data = {} structure_data = {} # load QE out file try: with open(out_file, 'r') as f: out_lines = f.read() except IOError: # non existing output file -> job crashed raise QEOutputParsingError( "Failed to open output file: {}.".format(out_file)) if not out_lines: # there is an output file, but it's empty -> crash job_successful = False # check if the job has finished (that doesn't mean without errors) finished_run = False for line in out_lines.split('\n')[::-1]: if 'JOB DONE' in line: finished_run = True break if not finished_run: # error if the job has not finished warning = 'QE pw run did not reach the end of the execution.' parser_info['parser_warnings'].append(warning) job_successful = False # parse try: out_data, trajectory_data, critical_messages = parse_pw_text_output( out_lines, xml_data, structure_data, input_dict) except QEOutputParsingError: if not finished_run: # I try to parse it as much as possible parser_info['parser_warnings'].append( 'Error while parsing the output file') out_data = {} trajectory_data = {} critical_messages = [] else: # if it was finished and I got error, it's a mistake of the parser raise QEOutputParsingError('Error while parsing QE output') # I add in the out_data all the last elements of trajectory_data values. # Safe for some large arrays, that I will likely never query. skip_keys = [ 'forces', 'lattice_vectors_relax', 'atomic_positions_relax', 'atomic_species_name' ] tmp_trajectory_data = copy.copy(trajectory_data) for x in tmp_trajectory_data.iteritems(): if x[0] in skip_keys: continue out_data[x[0]] = x[1][-1] if len(x[1] ) == 1: # delete eventual keys that are not arrays (scf cycles) trajectory_data.pop(x[0]) # note: if an array is empty, there will be KeyError for key in ['k_points', 'k_points_weights']: try: trajectory_data[key] = xml_data.pop(key) except KeyError: pass # As the k points are an array that is rather large, and again it's not something I'm going to parse likely # since it's an info mainly contained in the input file, I move it to the trajectory data # if there is a severe error, the calculation is FAILED if any([x in out_data['warnings'] for x in critical_messages]): job_successful = False for key in out_data.keys(): if key in xml_data.keys(): if key == 'fermi_energy' or key == 'fermi_energy_units': # an exception for the (only?) key that may be found on both del out_data[key] else: raise AssertionError( '{} found in both dictionaries, ' 'values: {} vs. {}'.format( key, out_data[key], xml_data[key])) # this shouldn't happen! # out_data keys take precedence and overwrite xml_data keys, # if the same key name is shared by both # dictionaries (but this should not happen!) parameter_data = dict(xml_data.items() + out_data.items() + parser_info.items()) # return various data. # parameter data will be mapped in ParameterData # trajectory_data in ArrayData # structure_data in a Structure # bands_data should probably be merged in ArrayData return parameter_data, trajectory_data, structure_data, job_successful
def spin_dependent_subparcer(out_info_dict): """ This find the projection and bands arrays from the out_file and out_info_dict. Used to handle the different possible spin-cases in a convenient manner. :param out_info_dict: contains various technical internals useful in parsing :return: ProjectionData, BandsData parsed from out_file """ out_file = out_info_dict["out_file"] spin_down = out_info_dict["spin_down"] od = out_info_dict #using a shorter name for convenience # regular expressions needed for later parsing WaveFraction1_re = re.compile(r"\=(.*?)\*") # state composition 1 WaveFractionremain_re = re.compile(r"\+(.*?)\*") # state comp 2 FunctionId_re = re.compile(r"\#(.*?)\]") # state identity # primes arrays for the later parsing num_wfc = len(od["wfc_lines"]) bands = np.zeros([od["k_states"], od["num_bands"]]) projection_arrays = np.zeros([od["k_states"], od["num_bands"], num_wfc]) try: for i in range(od["k_states"]): if spin_down: i += od["k_states"] # grabs band energy for j in range (i*od["num_bands"],(i+1)*od["num_bands"],1): out_ind = od["e_lines"][j] val = out_file[out_ind].split()[4] bands[i%od["k_states"]][j%od["num_bands"]] = val #subloop grabs pdos wave_fraction = [] wave_id = [] for k in range(od["e_lines"][j]+1,od["psi_lines"][j],1): out_line = out_file[k] wave_fraction += WaveFraction1_re.findall(out_line) wave_fraction += WaveFractionremain_re.findall(out_line) wave_id += FunctionId_re.findall(out_line) if len(wave_id) != len(wave_fraction): raise IndexError for l in range (len(wave_id)): wave_id[l] = int(wave_id[l]) wave_fraction[l] = float(wave_fraction[l]) #sets relevant values in pdos_array projection_arrays[i%od["k_states"]][ j%od["num_bands"]][wave_id[l]-1] = wave_fraction[l] except IndexError: raise QEOutputParsingError("the standard out file does not " "comply with the official " "documentation.") bands_data = BandsData() try: # Attempts to retrive the kpoints from the parent calc parent_calc = out_info_dict["parent_calc"] parent_kpoints = parent_calc.get_inputs_dict()['kpoints'] if len(od['k_vect']) != len(parent_kpoints.get_kpoints()): raise AttributeError bands_data.set_kpointsdata(parent_kpoints) except AttributeError: bands_data.set_kpoints(od['k_vect'].astype(float)) bands_data.set_bands(bands, units='eV') orbitals = out_info_dict["orbitals"] if len(orbitals) != np.shape(projection_arrays[0,0,:])[0]: raise QEOutputParsingError("There was an internal parsing error, " " the projection array shape does not agree" " with the number of orbitals") projection_data = ProjectionData() projection_data.set_reference_bandsdata(bands_data) projections = [projection_arrays[:,:,i] for i in range(len(orbitals))] # Do the bands_check manually here for projection in projections: if np.shape(projection) != np.shape(bands): raise AttributeError("Projections not the same shape as the bands") #insert here some logic to assign pdos to the orbitals pdos_arrays = spin_dependent_pdos_subparcer(out_info_dict) energy_arrays = [out_info_dict["energy"]]*len(orbitals) projection_data.set_projectiondata(orbitals, list_of_projections=projections, list_of_energy=energy_arrays, list_of_pdos=pdos_arrays, bands_check=False) # pdos=pdos_arrays return bands_data, projection_data
def parse_cp_xml_output(data): """ Parse xml data data must be a single string, as returned by file.read() (notice the difference with parse_text_output!) On output, a dictionary with parsed values. Democratically, we have decided to use picoseconds as units of time, eV for energies, Angstrom for lengths. """ import copy dom = parseString(data) parsed_data = {} #CARD HEADER parsed_data = copy.deepcopy(xml_card_header(parsed_data, dom)) # CARD CONTROL cardname = 'CONTROL' target_tags = read_xml_card(dom, cardname) tagname = 'PP_CHECK_FLAG' parsed_data[tagname.lower()] = parse_xml_child_bool(tagname, target_tags) # CARD STATUS cardname = 'STATUS' target_tags = read_xml_card(dom, cardname) tagname = 'STEP' attrname = 'ITERATION' parsed_data[(tagname + '_' + attrname).lower()] = int( parse_xml_child_attribute_str(tagname, attrname, target_tags)) tagname = 'TIME' attrname = 'UNITS' value = parse_xml_child_float(tagname, target_tags) units = parse_xml_child_attribute_str(tagname, attrname, target_tags) if units not in ['pico-seconds']: raise QEOutputParsingError( "Units {} are not supported by parser".format(units)) parsed_data[tagname.lower()] = value tagname = 'TITLE' parsed_data[tagname.lower()] = parse_xml_child_str(tagname, target_tags) # CARD CELL parsed_data, lattice_vectors, volume = copy.deepcopy( xml_card_cell(parsed_data, dom)) # CARD IONS parsed_data = copy.deepcopy( xml_card_ions(parsed_data, dom, lattice_vectors, volume)) # CARD PLANE WAVES parsed_data = copy.deepcopy(xml_card_planewaves(parsed_data, dom, 'cp')) # CARD SPIN parsed_data = copy.deepcopy(xml_card_spin(parsed_data, dom)) # CARD EXCHANGE_CORRELATION parsed_data = copy.deepcopy(xml_card_exchangecorrelation(parsed_data, dom)) # TODO CARD OCCUPATIONS # CARD BRILLOUIN ZONE # TODO: k points are saved for CP... Why? cardname = 'BRILLOUIN_ZONE' target_tags = read_xml_card(dom, cardname) tagname = 'NUMBER_OF_K-POINTS' parsed_data[tagname.replace('-', '_').lower()] = parse_xml_child_integer( tagname, target_tags) tagname = 'UNITS_FOR_K-POINTS' attrname = 'UNITS' metric = parse_xml_child_attribute_str(tagname, attrname, target_tags) if metric not in ['2 pi / a']: raise QEOutputParsingError( 'Error parsing attribute %s, tag %s inside %s, units unknown' % (attrname, tagname, target_tags.tagName)) parsed_data[tagname.replace('-', '_').lower()] = metric # TODO: check what happens if one does not use the monkhorst pack in the code tagname = 'MONKHORST_PACK_GRID' try: a = target_tags.getElementsByTagName(tagname)[0] value = [int(a.getAttribute('nk' + str(i + 1))) for i in range(3)] parsed_data[tagname.replace('-', '_').lower()] = value except: raise QEOutputParsingError('Error parsing tag %s inside %s.' % (tagname, target_tags.tagName)) tagname = 'MONKHORST_PACK_OFFSET' try: a = target_tags.getElementsByTagName(tagname)[0] value = [int(a.getAttribute('k' + str(i + 1))) for i in range(3)] parsed_data[tagname.replace('-', '_').lower()] = value except: raise QEOutputParsingError('Error parsing tag %s inside %s.' % (tagname, target_tags.tagName)) try: kpoints = [] for i in range(parsed_data['number_of_k_points']): tagname = 'K-POINT.' + str(i + 1) a = target_tags.getElementsByTagName(tagname)[0] b = a.getAttribute('XYZ').replace('\n', '').rsplit() value = [float(s) for s in b] metric = parsed_data['units_for_k_points'] if metric == '2 pi / a': value = [ float(s) / parsed_data['lattice_parameter'] for s in value ] weight = float(a.getAttribute('WEIGHT')) kpoints.append([value, weight]) parsed_data['k_point'] = kpoints except: raise QEOutputParsingError('Error parsing tag K-POINT.# inside %s.' % (target_tags.tagName)) tagname = 'NORM-OF-Q' # TODO decide if save this parameter parsed_data[tagname.replace('-', '_').lower()] = parse_xml_child_float( tagname, target_tags) # CARD PARALLELISM # can be optional try: cardname = 'PARALLELISM' target_tags = read_xml_card(dom, cardname) tagname = 'GRANULARITY_OF_K-POINTS_DISTRIBUTION' parsed_data[tagname.lower().replace('-', '_')] = parse_xml_child_integer( tagname, target_tags) tagname = 'NUMBER_OF_PROCESSORS' parsed_data[tagname.lower().replace('-', '_')] = parse_xml_child_integer( tagname, target_tags) tagname = 'NUMBER_OF_PROCESSORS_PER_POOL' parsed_data[tagname.lower().replace('-', '_')] = parse_xml_child_integer( tagname, target_tags) tagname = 'NUMBER_OF_PROCESSORS_PER_IMAGE' parsed_data[tagname.lower().replace('-', '_')] = parse_xml_child_integer( tagname, target_tags) tagname = 'NUMBER_OF_PROCESSORS_PER_TASKGROUP' parsed_data[tagname.lower().replace('-', '_')] = parse_xml_child_integer( tagname, target_tags) tagname = 'NUMBER_OF_PROCESSORS_PER_POT' parsed_data[tagname.lower().replace('-', '_')] = parse_xml_child_integer( tagname, target_tags) tagname = 'NUMBER_OF_PROCESSORS_PER_BAND_GROUP' parsed_data[tagname.lower().replace('-', '_')] = parse_xml_child_integer( tagname, target_tags) tagname = 'NUMBER_OF_PROCESSORS_PER_DIAGONALIZATION' parsed_data[tagname.lower().replace('-', '_')] = parse_xml_child_integer( tagname, target_tags) except: pass # CARD TIMESTEPS cardname = 'TIMESTEPS' target_tags = read_xml_card(dom, cardname) for tagname in ['STEP0', 'STEPM']: try: tag = target_tags.getElementsByTagName(tagname)[0] try: second_tagname = 'ACCUMULATORS' second_tag = tag.getElementsByTagName(second_tagname)[0] data = second_tag.childNodes[0].data.rstrip().split( ) # list of floats parsed_data[second_tagname.replace( '-', '_').lower()] = [float(i) for i in data] except: pass second_tagname = 'IONS_POSITIONS' second_tag = tag.getElementsByTagName(second_tagname)[0] third_tagname = 'stau' third_tag = second_tag.getElementsByTagName(third_tagname)[0] list_data = third_tag.childNodes[0].data.rstrip().split() list_data = [float(i) for i in list_data] # convert to matrix val = [] mat = [] for i, data in enumerate(list_data): val.append(data) if (i + 1) % 3 == 0: mat.append(val) val = [] parsed_data[(second_tagname + '_' + third_tagname).replace( '-', '_').lower()] = mat third_tagname = 'svel' third_tag = second_tag.getElementsByTagName(third_tagname)[0] list_data = third_tag.childNodes[0].data.rstrip().split() list_data = [float(i) for i in list_data] # convert to matrix val = [] mat = [] for i, data in enumerate(list_data): val.append(data) if (i + 1) % 3 == 0: mat.append(val) val = [] parsed_data[(second_tagname + '_' + third_tagname).replace( '-', '_').lower()] = mat try: third_tagname = 'taui' third_tag = second_tag.getElementsByTagName(third_tagname)[0] list_data = third_tag.childNodes[0].data.rstrip().split() list_data = [float(i) for i in list_data] # convert to matrix val = [] mat = [] for i, data in enumerate(list_data): val.append(data) if (i + 1) % 3 == 0: mat.append(val) val = [] parsed_data[(second_tagname + '_' + third_tagname).replace( '-', '_').lower()] = mat except: pass try: third_tagname = 'cdmi' third_tag = second_tag.getElementsByTagName(third_tagname)[0] list_data = third_tag.childNodes[0].data.rstrip().split() parsed_data[(second_tagname + '_' + third_tagname).replace( '-', '_').lower()] = [float(i) for i in list_data] except: pass try: third_tagname = 'force' third_tag = second_tag.getElementsByTagName(third_tagname)[0] list_data = third_tag.childNodes[0].data.rstrip().split() list_data = [float(i) for i in list_data] # convert to matrix val = [] mat = [] for i, data in enumerate(list_data): val.append(data) if (i + 1) % 3 == 0: mat.append(val) val = [] parsed_data[(second_tagname + '_' + third_tagname).replace( '-', '_').lower()] = mat except: pass second_tagname = 'IONS_NOSE' second_tag = tag.getElementsByTagName(second_tagname)[0] third_tagname = 'nhpcl' third_tag = second_tag.getElementsByTagName(third_tagname)[0] parsed_data[(second_tagname + '_' + third_tagname).replace( '-', '_').lower()] = float(third_tag.childNodes[0].data) third_tagname = 'nhpdim' third_tag = second_tag.getElementsByTagName(third_tagname)[0] parsed_data[(second_tagname + '_' + third_tagname).replace( '-', '_').lower()] = float(third_tag.childNodes[0].data) third_tagname = 'xnhp' third_tag = second_tag.getElementsByTagName(third_tagname)[0] parsed_data[(second_tagname + '_' + third_tagname).replace( '-', '_').lower()] = float(third_tag.childNodes[0].data) try: third_tagname = 'vnhp' third_tag = second_tag.getElementsByTagName(third_tagname)[0] parsed_data[(second_tagname + '_' + third_tagname).replace( '-', '_').lower()] = float(third_tag.childNodes[0].data) except: pass try: second_tagname = 'ekincm' second_tag = tag.getElementsByTagName(second_tagname)[0] parsed_data[second_tagname.replace('-', '_').lower()] = float( second_tag.childNodes[0].data) except: pass second_tagname = 'ELECTRONS_NOSE' second_tag = tag.getElementsByTagName(second_tagname)[0] try: third_tagname = 'xnhe' third_tag = second_tag.getElementsByTagName(third_tagname)[0] parsed_data[(second_tagname + '_' + third_tagname).replace( '-', '_').lower()] = float(third_tag.childNodes[0].data) except: pass try: third_tagname = 'vnhe' third_tag = second_tag.getElementsByTagName(third_tagname)[0] parsed_data[(second_tagname + '_' + third_tagname).replace( '-', '_').lower()] = float(third_tag.childNodes[0].data) except: pass second_tagname = 'CELL_PARAMETERS' second_tag = tag.getElementsByTagName(second_tagname)[0] try: third_tagname = 'ht' third_tag = second_tag.getElementsByTagName(third_tagname)[0] list_data = third_tag.childNodes[0].data.rstrip().split() list_data = [float(i) for i in list_data] # convert to matrix val = [] mat = [] for i, data in enumerate(list_data): val.append(data) if (i + 1) % 3 == 0: mat.append(val) val = [] parsed_data[(second_tagname + '_' + third_tagname).replace( '-', '_').lower()] = mat except: pass try: third_tagname = 'htvel' third_tag = second_tag.getElementsByTagName(third_tagname)[0] list_data = third_tag.childNodes[0].data.rstrip().split() list_data = [float(i) for i in list_data] # convert to matrix val = [] mat = [] for i, data in enumerate(list_data): val.append(data) if (i + 1) % 3 == 0: mat.append(val) val = [] parsed_data[(second_tagname + '_' + third_tagname).replace( '-', '_').lower()] = mat except: pass try: third_tagname = 'gvel' third_tag = second_tag.getElementsByTagName(third_tagname)[0] list_data = third_tag.childNodes[0].data.rstrip().split() list_data = [float(i) for i in list_data] # convert to matrix val = [] mat = [] for i, data in enumerate(list_data): val.append(data) if (i + 1) % 3 == 0: mat.append(val) val = [] parsed_data[(second_tagname + '_' + third_tagname).replace( '-', '_').lower()] = mat except: pass second_tagname = 'CELL_NOSE' second_tag = tag.getElementsByTagName(second_tagname)[0] try: third_tagname = 'xnhh' third_tag = second_tag.getElementsByTagName(third_tagname)[0] list_data = third_tag.childNodes[0].data.rstrip().split() list_data = [float(i) for i in list_data] # convert to matrix val = [] mat = [] for i, data in enumerate(list_data): val.append(data) if (i + 1) % 3 == 0: mat.append(val) val = [] parsed_data[(second_tagname + '_' + third_tagname).replace( '-', '_').lower()] = mat except: pass try: third_tagname = 'vnhh' third_tag = second_tag.getElementsByTagName(third_tagname)[0] list_data = third_tag.childNodes[0].data.rstrip().split() list_data = [float(i) for i in list_data] # convert to matrix val = [] mat = [] for i, data in enumerate(list_data): val.append(data) if (i + 1) % 3 == 0: mat.append(val) val = [] parsed_data[(second_tagname + '_' + third_tagname).replace( '-', '_').lower()] = mat except: pass except: raise QEOutputParsingError( 'Error parsing CARD {}'.format(cardname)) # CARD BAND_STRUCTURE_INFO cardname = 'BAND_STRUCTURE_INFO' target_tags = read_xml_card(dom, cardname) tagname = 'NUMBER_OF_ATOMIC_WFC' parsed_data[tagname.lower().replace('-', '_')] = parse_xml_child_integer( tagname, target_tags) tagname = 'NUMBER_OF_ELECTRONS' parsed_data[tagname.lower().replace('-', '_')] = int( parse_xml_child_float(tagname, target_tags)) tagname = 'NUMBER_OF_BANDS' parsed_data[tagname.lower().replace('-', '_')] = parse_xml_child_integer( tagname, target_tags) tagname = 'NUMBER_OF_SPIN_COMPONENTS' parsed_data[tagname.lower().replace('-', '_')] = parse_xml_child_integer( tagname, target_tags) # TODO # - EIGENVALUES (that actually just contains occupations) # Why should I be interested in that, if CP works for insulators only? # - EIGENVECTORS # - others TODO are written in the function return parsed_data