Exemplo n.º 1
0
def parse_raw_dos(dos_file, array_names, array_units):
    """
    This function takes as input the dos_file as a list of filelines along
    with information on how to give labels and units to the parsed data
    
    :param dos_file: dos file lines in the form of a list
    :type dos_file: list
    :param array_names: list of all array names, note that array_names[0]
                        is for the case with non spin-polarized calculations
                        and array_names[1] is for the case with spin-polarized
                        calculation
    :type array_names: list
    :param array_units: list of all array units, note that array_units[0] is
                        for the case with non spin-polarized calculations and
                        array_units[1] is for the case with spin-polarized
                        calculation
    :type array_units: list
    
    :return array_data: narray, a dictionary for ArrayData type, which contains
                        all parsed dos output along with labels and units
    :return spin: boolean, indicates whether the parsed results are spin
                  polarized 
    """

    dos_header = dos_file[0]
    try:
        dos_data = np.genfromtxt(dos_file)
    except ValueError:
        raise QEOutputParsingError('dosfile could not be loaded '
                                   ' using genfromtxt')
    if len(dos_data) == 0:
        raise QEOutputParsingError("Dos file is empty.")
    if np.isnan(dos_data).any():
        raise QEOutputParsingError("Dos file contains non-numeric elements.")

    # Checks the number of columns, essentially to see whether spin was used
    if len(dos_data[0]) == 3:
        # spin is not used
        array_names = array_names[0]
        array_units = array_units[0]
        spin = False
    elif len(dos_data[0]) == 4:
        # spin is used
        array_names = array_names[1]
        array_units = array_units[1]
        spin = True
    else:
        raise QEOutputParsingError("Dos file in format that the parser is not "
                                   "designed to handle.")

    i = 0
    array_data = {}
    array_data['header'] = np.array(dos_header)
    while i < len(array_names):
        array_data[array_names[i]] = dos_data[:, i]
        array_data[array_names[i] + '_units'] = np.array(array_units[i])
        i += 1
    return array_data, spin
Exemplo n.º 2
0
def str2bool(string):
    try:
        false_items = ["f", "0", "false", "no"]
        true_items = ["t", "1", "true", "yes"]
        string = str(string.lower().strip())
        if string in false_items:
            return False
        if string in true_items:
            return True
        else:
            raise QEOutputParsingError('Error converting string '
                                       '{} to boolean value.'.format(string))
    except Exception:
        raise QEOutputParsingError('Error converting string to boolean.')
Exemplo n.º 3
0
def parse_ph_tensor(data):
    """
    Parse the xml tensor file of QE v5.0.3
    data must be read from the file with the .read() function (avoid readlines)
    """
    
    dom = parseString(data)
    
    parsed_data = {}
    
    parsed_data['xml_warnings'] = []
    
    # card EF_TENSORS
    cardname = 'EF_TENSORS'
    target_tags = read_xml_card(dom,cardname)
    
    tagname='DONE_ELECTRIC_FIELD'
    parsed_data[tagname.lower()]=parse_xml_child_bool(tagname,target_tags)
    
    if parsed_data[tagname.lower()]:
        try:
            second_tagname = 'DIELECTRIC_CONSTANT'  
            parsed_data[second_tagname.lower()] = parse_xml_matrices(second_tagname,
                                                                     target_tags)
        except:
            raise QEOutputParsingError('Failed to parse Dielectric constant')
    
    tagname='DONE_EFFECTIVE_CHARGE_EU'
    parsed_data[tagname.lower()]=parse_xml_child_bool(tagname,target_tags)
    
    if parsed_data[tagname.lower()]:
        try:
            second_tagname = 'EFFECTIVE_CHARGES_EU'
            dumb_matrix = parse_xml_matrices(second_tagname,target_tags)
            # separate the elements of the messy matrix, with a matrix 3x3 for each element
            new_matrix = []
            this_at = []
            for i in dumb_matrix:
                this_at.append(i)
                if len(this_at) == 3:
                    new_matrix.append(this_at)
                    this_at = []
                    
            parsed_data[second_tagname.lower()] = new_matrix
        except:
            raise QEOutputParsingError('Failed to parse effective charges eu')

    return parsed_data
Exemplo n.º 4
0
def parse_xml_child_integer(tagname, target_tags):
    try:
        # a=target_tags.getElementsByTagName(tagname)[0]
        a = [_ for _ in target_tags.childNodes if _.nodeName == tagname][0]
        b = a.childNodes[0]
        return int(b.data)
    except Exception:
        raise QEOutputParsingError('Error parsing tag {} inside {}'.format(
            tagname, target_tags.tagName))
Exemplo n.º 5
0
def parse_xml_child_str(tagname, target_tags):
    try:
        # a=target_tags.getElementsByTagName(tagname)[0]
        a = [_ for _ in target_tags.childNodes if _.nodeName == tagname][0]
        b = a.childNodes[0]
        return str(b.data).rstrip().replace('\n', '')
    except Exception:
        raise QEOutputParsingError('Error parsing tag {} inside {}' \
                                   .format(tagname, target_tags.tagName))
Exemplo n.º 6
0
def parse_raw_out_basic(out_file, calc_name):
    """
    A very simple parser for the standard out, usually aiida.out. Currently
    only parses basic warnings and the walltime.
    :param out_file: the standard out to be parsed
    :param calc_name: the name of the calculation, e.g. PROJWFC
    :return: parsed_data
    """

    # read file
    parsed_data = {}
    parsed_data['warnings'] = []
    # critical warnings: if any is found, the calculation status is FAILED
    critical_warnings = {'Maximum CPU time exceeded':'Maximum CPU time exceeded',
                         '%%%%%%%%%%%%%%':None,
                         }

    minor_warnings = {'Warning:':None,
                      'DEPRECATED:':None,
                      }
    all_warnings = dict(critical_warnings.items() + minor_warnings.items())
    for count in range (len(out_file)):
        line = out_file[count]
        # parse the global file, for informations that are written only once
        if 'TOTAL NUMBER OF OPTIMAL BASIS VECTORS :' in line:
            parsed_data['number_optimal_basis_vectors'] = int(line.split(':')[-1])
        if calc_name in line and 'WALL' in line:
            try:
                time = line.split('CPU')[1].split('WALL')[0]
                cpu_time = line.split(':')[1].split('CPU')[0]
                parsed_data['wall_time'] = time
                parsed_data['cpu_time'] = cpu_time
            except ValueError:
                parsed_data['warnings'].append('Error while parsing wall time.')
            try:
                parsed_data['wall_time_seconds'] = convert_qe_time_to_sec(time)
                parsed_data['cpu_time_seconds'] = convert_qe_time_to_sec(cpu_time)
            except ValueError:
                raise QEOutputParsingError("Unable to convert wall_time in seconds.")
            # Parsing of errors
        elif any( i in line for i in all_warnings):
            message = [ all_warnings[i] for i in all_warnings.keys() if i in line][0]
            if message is None:
                message = line
            if '%%%%%%%%%%%%%%' in line:
                message  = None
                messages = parse_QE_errors(out_file,count,parsed_data['warnings'])

            # if it found something, add to log
            try:
                parsed_data['warnings'].extend(messages)
            except UnboundLocalError:
                pass
            if message is not None:
                parsed_data['warnings'].append(message)

    return parsed_data
Exemplo n.º 7
0
def find_orbitals_from_statelines(out_info_dict):
    """
    This function reads in all the state_lines, that is, the lines describing
    which atomic states, taken from the pseudopotential, are used for the
    projection. Then it converts these state_lines into a set of orbitals

    :param out_info_dict: contains various technical internals useful in parsing
    :return: orbitals, a list of orbitals suitable for setting ProjectionData
    """
    out_file = out_info_dict["out_file"]
    atomnum_re = re.compile(r"atom (.*?)\(")
    element_re = re.compile(r"\((.*?)\)")
    lnum_re = re.compile(r"l=(.*?)m=")
    mnum_re = re.compile(r"m=(.*?)\)")
    wfc_lines = out_info_dict["wfc_lines"]
    state_lines = [out_file[wfc_line] for wfc_line in wfc_lines]
    state_dicts = []
    for state_line in state_lines:
        try:
            state_dict = {}
            state_dict["atomnum"] = int(atomnum_re.findall(state_line)[0])
            state_dict["atomnum"] -= 1 # to keep with orbital indexing
            state_dict["kind_name"] = element_re.findall(state_line)[0].strip()
            state_dict["angular_momentum"] = int(lnum_re.findall(state_line)[0])
            state_dict["magnetic_number"] = int(mnum_re.findall(state_line)[0])
            state_dict["magnetic_number"] -= 1 # to keep with orbital indexing
        except ValueError:
            raise QEOutputParsingError("State lines are not formatted "
            "in a standard way.")
        state_dicts.append(state_dict)

    # here is some logic to figure out the value of radial_nodes to use
    new_state_dicts = []
    for i in range(len(state_dicts)):
        radial_nodes = 0
        state_dict = state_dicts[i].copy()
        for j in range(i-1, -1, -1):
            if state_dict == state_dicts[j]:
                radial_nodes += 1
        state_dict["radial_nodes"] = radial_nodes
        new_state_dicts.append(state_dict)
    state_dicts = new_state_dicts

    # here is some logic to assign positions based on the atom_index
    structure = out_info_dict["structure"]
    for state_dict in state_dicts:
        site_index = state_dict.pop("atomnum")
        state_dict["position"] = structure.sites[site_index].position

    # here we set the resulting state_dicts to a new set of orbitals
    orbitals = []
    realh = OrbitalFactory("realhydrogen")
    for state_dict in state_dicts:
        this_orb = realh()
        this_orb.set_orbital_dict(state_dict)
        orbitals.append(this_orb)
    return orbitals
Exemplo n.º 8
0
 def __init__(self, calculation):
     """
     Initialize the instance of ProjwfcParser
     """
     # check for valid input
     if not isinstance(calculation, ProjwfcCalculation):
         raise QEOutputParsingError("Input calc must be a "
                                    "ProjwfcCalculation")
     self._calc = calculation
     super(ProjwfcParser, self).__init__(calculation)
Exemplo n.º 9
0
def parse_xml_child_attribute_int(tagname, attributename, target_tags):
    try:
        # a=target_tags.getElementsByTagName(tagname)[0]
        a = [_ for _ in target_tags.childNodes if _.nodeName == tagname][0]
        value = int(a.getAttribute(attributename))
        return value
    except Exception:
        raise QEOutputParsingError(
            'Error parsing attribute {}, tag {} inside {}'.format(
                attributename, tagname, target_tags.tagName))
Exemplo n.º 10
0
    def __init__(self, calculation):
        """
        Initialize the instance of DosParser
        """
        # check for valid input
        if not isinstance(calculation, SimpleCalculation):
            raise QEOutputParsingError("Input calc must be a DosCalculation")

        self._calc = calculation

        super(SimpleParser, self).__init__(calculation)
Exemplo n.º 11
0
def read_xml_card(dom, cardname):
    try:
        root_node = [
            _ for _ in dom.childNodes
            if isinstance(_, xml.dom.minidom.Element) and _.nodeName == "Root"
        ][0]
        the_card = [_ for _ in root_node.childNodes
                    if _.nodeName == cardname][0]
        # the_card = dom.getElementsByTagName(cardname)[0]
        return the_card
    except Exception as e:
        print e
        raise QEOutputParsingError('Error parsing tag {}'.format(cardname))
Exemplo n.º 12
0
def parse_cp_xml_output(data):
    """
    Parse xml data
    data must be a single string, as returned by file.read() (notice the
    difference with parse_text_output!)
    On output, a dictionary with parsed values.
    Democratically, we have decided to use picoseconds as units of time, eV for energies, Angstrom for lengths.
    """
    import copy

    dom = parseString(data)

    parsed_data = {}

    # CARD STATUS

    cardname = 'STATUS'
    target_tags = read_xml_card(dom, cardname)

    tagname = 'STEP'
    attrname = 'ITERATION'
    parsed_data[(tagname + '_' + attrname).lower()] = int(
        parse_xml_child_attribute_str(tagname, attrname, target_tags))

    tagname = 'TIME'
    attrname = 'UNITS'
    value = parse_xml_child_float(tagname, target_tags)
    units = parse_xml_child_attribute_str(tagname, attrname, target_tags)
    if units not in ['pico-seconds']:
        raise QEOutputParsingError(
            "Units {} are not supported by parser".format(units))
    parsed_data[tagname.lower()] = value

    tagname = 'TITLE'
    parsed_data[tagname.lower()] = parse_xml_child_str(tagname, target_tags)

    # CARD CELL
    parsed_data, lattice_vectors, volume = copy.deepcopy(
        xml_card_cell(parsed_data, dom))

    # CARD IONS
    parsed_data = copy.deepcopy(
        xml_card_ions(parsed_data, dom, lattice_vectors, volume))

    # CARD TIMESTEPS

    cardname = 'TIMESTEPS'
    target_tags = read_xml_card(dom, cardname)

    for tagname in ['STEP0', 'STEPM']:
        try:
            tag = target_tags.getElementsByTagName(tagname)[0]

            try:
                second_tagname = 'ACCUMULATORS'
                second_tag = tag.getElementsByTagName(second_tagname)[0]
                data = second_tag.childNodes[0].data.rstrip().split(
                )  # list of floats
                parsed_data[second_tagname.replace(
                    '-', '_').lower()] = [float(i) for i in data]
            except:
                pass

            second_tagname = 'IONS_POSITIONS'
            second_tag = tag.getElementsByTagName(second_tagname)[0]
            third_tagname = 'stau'
            third_tag = second_tag.getElementsByTagName(third_tagname)[0]
            list_data = third_tag.childNodes[0].data.rstrip().split()
            list_data = [float(i) for i in list_data]
            # convert to matrix
            val = []
            mat = []
            for i, data in enumerate(list_data):
                val.append(data)
                if (i + 1) % 3 == 0:
                    mat.append(val)
                    val = []
            parsed_data[(second_tagname + '_' + third_tagname).replace(
                '-', '_').lower()] = mat
            third_tagname = 'svel'
            third_tag = second_tag.getElementsByTagName(third_tagname)[0]
            list_data = third_tag.childNodes[0].data.rstrip().split()
            list_data = [float(i) for i in list_data]
            # convert to matrix
            val = []
            mat = []
            for i, data in enumerate(list_data):
                val.append(data)
                if (i + 1) % 3 == 0:
                    mat.append(val)
                    val = []
            parsed_data[(second_tagname + '_' + third_tagname).replace(
                '-', '_').lower()] = mat
            try:
                third_tagname = 'taui'
                third_tag = second_tag.getElementsByTagName(third_tagname)[0]
                list_data = third_tag.childNodes[0].data.rstrip().split()
                list_data = [float(i) for i in list_data]
                # convert to matrix
                val = []
                mat = []
                for i, data in enumerate(list_data):
                    val.append(data)
                    if (i + 1) % 3 == 0:
                        mat.append(val)
                        val = []
                parsed_data[(second_tagname + '_' + third_tagname).replace(
                    '-', '_').lower()] = mat
            except:
                pass

            try:
                third_tagname = 'cdmi'
                third_tag = second_tag.getElementsByTagName(third_tagname)[0]
                list_data = third_tag.childNodes[0].data.rstrip().split()
                parsed_data[(second_tagname + '_' + third_tagname).replace(
                    '-', '_').lower()] = [float(i) for i in list_data]
            except:
                pass

            try:
                third_tagname = 'force'
                third_tag = second_tag.getElementsByTagName(third_tagname)[0]
                list_data = third_tag.childNodes[0].data.rstrip().split()
                list_data = [float(i) for i in list_data]
                # convert to matrix
                val = []
                mat = []
                for i, data in enumerate(list_data):
                    val.append(data)
                    if (i + 1) % 3 == 0:
                        mat.append(val)
                        val = []
                parsed_data[(second_tagname + '_' + third_tagname).replace(
                    '-', '_').lower()] = mat
            except:
                pass

            second_tagname = 'IONS_NOSE'
            second_tag = tag.getElementsByTagName(second_tagname)[0]
            third_tagname = 'nhpcl'
            third_tag = second_tag.getElementsByTagName(third_tagname)[0]
            parsed_data[(second_tagname + '_' + third_tagname).replace(
                '-', '_').lower()] = float(third_tag.childNodes[0].data)
            third_tagname = 'nhpdim'
            third_tag = second_tag.getElementsByTagName(third_tagname)[0]
            parsed_data[(second_tagname + '_' + third_tagname).replace(
                '-', '_').lower()] = float(third_tag.childNodes[0].data)
            third_tagname = 'xnhp'
            third_tag = second_tag.getElementsByTagName(third_tagname)[0]
            parsed_data[(second_tagname + '_' + third_tagname).replace(
                '-', '_').lower()] = float(third_tag.childNodes[0].data)
            try:
                third_tagname = 'vnhp'
                third_tag = second_tag.getElementsByTagName(third_tagname)[0]
                parsed_data[(second_tagname + '_' + third_tagname).replace(
                    '-', '_').lower()] = float(third_tag.childNodes[0].data)
            except:
                pass

            try:
                second_tagname = 'ekincm'
                second_tag = tag.getElementsByTagName(second_tagname)[0]
                parsed_data[second_tagname.replace('-', '_').lower()] = float(
                    second_tag.childNodes[0].data)
            except:
                pass

            second_tagname = 'ELECTRONS_NOSE'
            second_tag = tag.getElementsByTagName(second_tagname)[0]
            try:
                third_tagname = 'xnhe'
                third_tag = second_tag.getElementsByTagName(third_tagname)[0]
                parsed_data[(second_tagname + '_' + third_tagname).replace(
                    '-', '_').lower()] = float(third_tag.childNodes[0].data)
            except:
                pass
            try:
                third_tagname = 'vnhe'
                third_tag = second_tag.getElementsByTagName(third_tagname)[0]
                parsed_data[(second_tagname + '_' + third_tagname).replace(
                    '-', '_').lower()] = float(third_tag.childNodes[0].data)
            except:
                pass

            second_tagname = 'CELL_PARAMETERS'
            second_tag = tag.getElementsByTagName(second_tagname)[0]
            try:
                third_tagname = 'ht'
                third_tag = second_tag.getElementsByTagName(third_tagname)[0]
                list_data = third_tag.childNodes[0].data.rstrip().split()
                list_data = [float(i) for i in list_data]
                # convert to matrix
                val = []
                mat = []
                for i, data in enumerate(list_data):
                    val.append(data)
                    if (i + 1) % 3 == 0:
                        mat.append(val)
                        val = []
                parsed_data[(second_tagname + '_' + third_tagname).replace(
                    '-', '_').lower()] = mat
            except:
                pass
            try:
                third_tagname = 'htvel'
                third_tag = second_tag.getElementsByTagName(third_tagname)[0]
                list_data = third_tag.childNodes[0].data.rstrip().split()
                list_data = [float(i) for i in list_data]
                # convert to matrix
                val = []
                mat = []
                for i, data in enumerate(list_data):
                    val.append(data)
                    if (i + 1) % 3 == 0:
                        mat.append(val)
                        val = []
                parsed_data[(second_tagname + '_' + third_tagname).replace(
                    '-', '_').lower()] = mat
            except:
                pass
            try:
                third_tagname = 'gvel'
                third_tag = second_tag.getElementsByTagName(third_tagname)[0]
                list_data = third_tag.childNodes[0].data.rstrip().split()
                list_data = [float(i) for i in list_data]
                # convert to matrix
                val = []
                mat = []
                for i, data in enumerate(list_data):
                    val.append(data)
                    if (i + 1) % 3 == 0:
                        mat.append(val)
                        val = []
                parsed_data[(second_tagname + '_' + third_tagname).replace(
                    '-', '_').lower()] = mat
            except:
                pass

            second_tagname = 'CELL_NOSE'
            second_tag = tag.getElementsByTagName(second_tagname)[0]
            try:
                third_tagname = 'xnhh'
                third_tag = second_tag.getElementsByTagName(third_tagname)[0]
                list_data = third_tag.childNodes[0].data.rstrip().split()
                list_data = [float(i) for i in list_data]

                # convert to matrix
                val = []
                mat = []
                for i, data in enumerate(list_data):
                    val.append(data)
                    if (i + 1) % 3 == 0:
                        mat.append(val)
                        val = []
                parsed_data[(second_tagname + '_' + third_tagname).replace(
                    '-', '_').lower()] = mat
            except:
                pass
            try:
                third_tagname = 'vnhh'
                third_tag = second_tag.getElementsByTagName(third_tagname)[0]
                list_data = third_tag.childNodes[0].data.rstrip().split()
                list_data = [float(i) for i in list_data]
                # convert to matrix
                val = []
                mat = []
                for i, data in enumerate(list_data):
                    val.append(data)
                    if (i + 1) % 3 == 0:
                        mat.append(val)
                        val = []
                parsed_data[(second_tagname + '_' + third_tagname).replace(
                    '-', '_').lower()] = mat
            except:
                pass
        except:
            raise QEOutputParsingError(
                'Error parsing CARD {}'.format(cardname))

    # CARD BAND_STRUCTURE_INFO

    cardname = 'BAND_STRUCTURE_INFO'
    target_tags = read_xml_card(dom, cardname)

    tagname = 'NUMBER_OF_ATOMIC_WFC'
    parsed_data[tagname.lower().replace('-', '_')] = parse_xml_child_integer(
        tagname, target_tags)

    tagname = 'NUMBER_OF_ELECTRONS'
    parsed_data[tagname.lower().replace('-', '_')] = int(
        parse_xml_child_float(tagname, target_tags))

    tagname = 'NUMBER_OF_BANDS'
    parsed_data[tagname.lower().replace('-', '_')] = parse_xml_child_integer(
        tagname, target_tags)

    tagname = 'NUMBER_OF_SPIN_COMPONENTS'
    parsed_data[tagname.lower().replace('-', '_')] = parse_xml_child_integer(
        tagname, target_tags)

    return parsed_data
Exemplo n.º 13
0
def parse_cp_raw_output(out_file, xml_file=None, xml_counter_file=None):
    parser_version = '0.1'
    parser_info = {}
    parser_info['parser_warnings'] = []
    parser_info['parser_info'] = 'AiiDA QE Parser v{}'.format(parser_version)

    # analyze the xml
    if xml_file is not None:
        try:
            with open(xml_file, 'r') as f:
                xml_lines = f.read()
        except IOError:
            raise QEOutputParsingError(
                "Failed to open xml file: %s.".format(xml_file))
        # TODO: this function should probably be the same of pw.
        # after all, the parser was fault-tolerant
        xml_data = parse_cp_xml_output(xml_lines)
    else:
        parser_info['parser_warnings'].append(
            'Skipping the parsing of the xml file.')
        xml_data = {}

    # analyze the counter file, which keeps info on the steps
    if xml_counter_file is not None:
        try:
            with open(xml_counter_file, 'r') as f:
                xml_counter_lines = f.read()
        except IOError:
            raise QEOutputParsingError(
                "Failed to open xml counter file: %s.".format(xml_file))
        xml_counter_data = parse_cp_xml_counter_output(xml_counter_lines)
    else:
        xml_counter_data = {}

    # analyze the standard output
    try:
        with open(out_file, 'r') as f:
            out_lines = f.readlines()
    except IOError:
        raise QEOutputParsingError("Failed to open output file: %s." %
                                   out_file)

    # understand if the job ended smoothly
    job_successful = False
    for line in reversed(out_lines):
        if 'JOB DONE' in line:
            job_successful = True
            break

    out_data = parse_cp_text_output(out_lines, xml_data)

    for key in out_data.keys():
        if key in xml_data.keys():
            raise AssertionError('%s found in both dictionaries' % key)
        if key in xml_counter_data.keys():
            raise AssertionError('%s found in both dictionaries' % key)
            # out_data keys take precedence and overwrite xml_data keys,
            # if the same key name is shared by both (but this should not happen!)
    final_data = dict(xml_data.items() + out_data.items() +
                      xml_counter_data.items())

    # TODO: parse the trajectory and save them in a reasonable format

    return final_data, job_successful
Exemplo n.º 14
0
def parse_ph_dynmat(data,lattice_parameter=None,also_eigenvectors=False,
                    parse_header=False):
    """
    parses frequencies and eigenvectors of a single dynamical matrix
    :param data: the text read with the function readlines()
    :param lattice_parameter: the lattice_parameter ('alat' in QE jargon). If 
        None, q_point is kept in 2pi/a coordinates as in the dynmat file.
    :param also_eigenvectors: if True, return an additional 'eigenvectors' 
        array in output, containing also the eigenvectors. This will be
        a list of lists, that when converted to a numpy array has 4 indices,
        with shape Neigenstates x Natoms x 3(xyz) x 2 (re,im)
        To convert to a complex numpy array, you can use::

          ev = np.array(parsed_data['eigenvectors'])
          ev = ev[:,:,:,0] + 1j * ev[:,:,:,1]
    :param parse_header: if True, return additional keys in the returned
        parsed_data dictionary, including information from the header

    :return parsed_data: a dictionary with parsed values and units
    
    """
    parsed_data = {}
    parsed_data['warnings'] = []
    
    if 'Dynamical matrix file' not in data[0]:
        raise QEOutputParsingError('Dynamical matrix is not in the expected format') 
        
    frequencies = []
    eigenvectors = []

    starting_line = 1
    if parse_header:
        header_dict = {"warnings": []}
        try:
            pieces = data[2].split()
            if len(pieces) != 9:
                raise QEOutputParsingError("Wrong # of elements on line 3")
            try:
                num_species = int(pieces[0])
                num_atoms = int(pieces[1])
                header_dict['ibrav'] = int(pieces[2])
                header_dict['celldm'] = [float(i) for i in pieces[3:]]
                # In angstrom
                alat = header_dict['celldm'][0] * bohr_to_ang
                if abs(alat) < 1.e-5:
                    raise QEOutputParsingError(
                        "Lattice constant=0! Probably you are using an "
                        "old Quantum ESPRESSO version?")
                header_dict["alat"] = alat
                header_dict["alat_units"] = "angstrom"
            except ValueError:
                raise QEOutputParsingError("Wrong data on line 3")

            starting_line = 3
            if header_dict['ibrav'] == 0:
                if 'Basis vectors' not in data[3]:
                    raise QEOutputParsingError(
                        "Wrong format (no 'Basis vectors' line)")
                try:
                    v1 = [float(_)*alat for _ in data[4].split()]
                    v2 = [float(_)*alat for _ in data[5].split()]
                    v3 = [float(_)*alat for _ in data[6].split()]
                    if len(v1) != 3 or len(v2) != 3 or len(v3) != 3:
                        raise QEOutputParsingError(
                            "Wrong length for basis vectors")
                    header_dict['lattice_vectors'] = [v1,v2,v3]
                    header_dict['lattice_vectors_units'] = "angstrom"
                except ValueError:
                    raise QEOutputParsingError("Wrong data for basis vectors")
                starting_line += 4

            species_info = {}
            species = []
            for idx, sp_line in enumerate(
                data[starting_line:starting_line + num_species],
                start=1):
                pieces = sp_line.split("'")
                if len(pieces) != 3:
                    raise QEOutputParsingError(
                        "Wrong # of elements for one of the species")
                try:
                    if int(pieces[0]) != idx:
                        raise QEOutputParsingError(
                            "Error with the indices of the species")
                    species.append([pieces[1].strip(),
                                    float(pieces[2])/amu_Ry])
                except ValueError:
                    raise QEOutputParsingError("Error parsing the species")
            
            masses = dict(species)
            header_dict['masses'] = masses

            atoms_coords = []
            atoms_labels = []
            starting_line += num_species
            for idx, atom_line in enumerate(
                data[starting_line:starting_line + num_atoms],
                start=1):
                pieces = atom_line.split()
                if len(pieces) != 5:
                    raise QEOutputParsingError(
                        "Wrong # of elements for one of the atoms: {}, "
                        "line {}: {}".format(
                            len(pieces), starting_line+idx, pieces))
                try:
                    if int(pieces[0]) != idx:
                        raise QEOutputParsingError(
                            "Error with the indices of the atoms: "
                            "{} vs {}".format(int(pieces[0]), idx))
                    sp_idx = int(pieces[1])
                    if sp_idx > len(species):
                        raise QEOutputParsingError("Wrong index for the species: "
                                            "{}, but max={}".format(
                                sp_idx, len(species)))
                    atoms_labels.append(species[sp_idx-1][0])
                    atoms_coords.append([float(pieces[2])*alat,
                                         float(pieces[3])*alat,
                                         float(pieces[4])*alat])
                except ValueError:
                    raise QEOutputParsingError("Error parsing the atoms")
                except IndexError:
                    raise QEOutputParsingError(
                        "Error with the indices in the atoms section")
            header_dict['atoms_labels'] = atoms_labels
            header_dict['atoms_coords'] = atoms_coords
            header_dict['atoms_coords_units'] = "angstrom"
            
            starting_line += num_atoms
            
            starting_line += 1 # Got to the next line to check
            if 'Dynamical' not in data[starting_line]:
                raise QEOutputParsingError(
                    "Wrong format (no 'Dynamical  Matrix' line)")            
            
            ## Here I finish the header parsing

        except QEOutputParsingError as e:
            parsed_data['warnings'].append(
                "Problem parsing the header of the matdyn file! (msg: {}). "
                "Storing only the information I managed to retrieve".format(
                    e.message))
            header_dict['warnings'].append(
                "There was some parsing error and this dictionary is "
                "not complete, see the warnings of the top parsed_data dict")

        # I store what I got
        parsed_data['header'] = header_dict
    
    for line_counter,line in enumerate(data[starting_line:],
                                       start=starting_line):
        if 'q = ' in line:
            # q point is written several times, because it can also be rotated.
            # I consider only the first point, which is the one computed
            if 'q_point' not in parsed_data:
                q_point = [ float(i) for i in line.split('(')[1].split(')')[0].split() ]
                if lattice_parameter:
                    parsed_data['q_point'] = [ e*2*numpy.pi/lattice_parameter for e in q_point]
                    parsed_data['q_point_units'] = 'angstrom-1'
                else:
                    parsed_data['q_point'] = q_point
                    parsed_data['q_point_units'] = '2pi/lattice_parameter'
        
        if 'freq' in line or 'omega' in line:
            this_freq = line.split('[cm-1]')[0].split('=')[-1]
            
            # exception for bad fortran coding: *** could be written instead of the number
            if '*' in this_freq:
                frequencies.append(None)
                parsed_data['warnings'].append('Wrong fortran formatting found while parsing frequencies')
            else:
                frequencies.append( float(this_freq) )
            
            this_eigenvectors = []
            for new_line in data[line_counter+1:]:
                if ('freq' in new_line or 'omega' in new_line or
                    '************************************************'
                    in new_line):
                    break
                this_things = new_line.split('(')[1].split(')')[0].split()
                try:
                    this_flatlist = [float(i) for i in this_things]
                except ValueError:
                    parsed_data['warnings'].append('Wrong fortran formatting found while parsing eigenvectors')
                    # then save the three (xyz) complex numbers as [None,None]
                    this_eigenvectors.append([[None,None]]*3)
                    continue
                
                list_tuples = zip(*[iter(this_flatlist)]*2)
                # I save every complex number as a list of two numbers
                this_eigenvectors.append( [ [i[0],i[1]] for i in list_tuples ] )
                
            eigenvectors.append(this_eigenvectors)
            
    parsed_data['frequencies'] = frequencies
    parsed_data['frequencies_units'] = 'cm-1'
    # TODO: the eigenvectors should be written in the database according to a parser_opts.
    # for now, we don't store them, otherwise we get too much stuff
    # We implement anyway the possibility to get it with an optional parameter
    if also_eigenvectors:
        parsed_data['eigenvectors'] = eigenvectors
    
    return parsed_data
Exemplo n.º 15
0
def parse_ph_text_output(lines):
    """
    Parses the stdout of QE-PH.
    
    :param lines: list of strings, the file as read by readlines()
    
    :return parsed_data: dictionary with parsed values.
    :return critical_messages: a list with critical messages. If any is found in
                               parsed_data['warnings'], the calculation is FAILED!
    """
    from aiida.parsers.plugins.quantumespresso.raw_parser_pw import parse_QE_errors

    parsed_data = {}
    parsed_data['warnings'] = []
    # parse time, starting from the end
    # apparently, the time is written multiple times
    for line in reversed(lines):
        if 'PHONON' in line and 'WALL' in line:
            try:
                time = line.split('CPU')[1].split('WALL')[0]
                parsed_data['wall_time'] = time
            except Exception:
                parsed_data['warnings'].append('Error while parsing wall time.')
                
            try:
                parsed_data['wall_time_seconds'] = \
                    convert_qe_time_to_sec(parsed_data['wall_time'])
            except ValueError:
                raise QEOutputParsingError("Unable to convert wall_time in seconds.")
            break
        
    # parse number of q-points and number of atoms
    for count,line in enumerate(lines):
        if 'q-points for this run' in line:
            try:
                num_qpoints = int(line.split('/')[1].split('q-points')[0])
                if ( 'number_of_qpoints' in parsed_data.keys() and 
                     num_qpoints != parsed_data['number_of_qpoints']):
                    parsed_data['warnings'].append("Number q-points found "
                                                   "several times with different"
                                                   " values")
                else:
                    parsed_data['number_of_qpoints'] = num_qpoints
            except Exception:
                parsed_data['warnings'].append("Error while parsing number of "
                                               "q points.")
        
        elif 'q-points)' in line:
            # case of a 'only_wfc' calculation
            try:
                num_qpoints = int(line.split('q-points')[0].split('(')[1])
                if ( 'number_of_qpoints' in parsed_data.keys() and 
                     num_qpoints != parsed_data['number_of_qpoints']):
                    parsed_data['warnings'].append("Number q-points found "
                                                   "several times with different"
                                                   " values")
                else:
                    parsed_data['number_of_qpoints'] = num_qpoints
            except Exception:
                parsed_data['warnings'].append("Error while parsing number of "
                                               "q points.")
            
        elif "number of atoms/cell" in line:
            try:
                num_atoms = int(line.split('=')[1])
                parsed_data['number_of_atoms'] = num_atoms
            except Exception:
                parsed_data['warnings'].append("Error while parsing number of "
                                               "atoms.")
        
        elif "irreducible representations" in line:
            if 'number_of_irr_representations_for_each_q' not in parsed_data.keys():
                parsed_data['number_of_irr_representations_for_each_q'] = []
            try:
                num_irr_repr = int(line.split('irreducible')[0].split('are')[1])
                parsed_data['number_of_irr_representations_for_each_q'].append(num_irr_repr)
            except Exception:
                pass
            
        #elif "lattice parameter (alat)" in line:
        #    lattice_parameter = float(line.split('=')[1].split('a.u.')[0])*bohr_to_ang
            
        #elif ('cell' not in parsed_data.keys() and
        #      "crystal axes: (cart. coord. in units of alat)" in line):
        #    cell = [ [float(e)*lattice_parameter for e in li.split("a({}) = (".format(i+1)
        #            )[1].split(")")[0].split()] for i,li in enumerate(lines[count+1:count+4])]
        #    parsed_data['cell'] = cell
            
    # TODO: find a more exhaustive list of the common errors of ph
    
    # critical warnings: if any is found, the calculation status is FAILED
    critical_warnings = {'No convergence has been achieved':
                         'Phonon did not reach end of self consistency',
                         'Maximum CPU time exceeded':'Maximum CPU time exceeded',
                         '%%%%%%%%%%%%%%':None,
                         }
    
    minor_warnings = {'Warning:':None,
                      }
    
    all_warnings = dict(critical_warnings.items() + minor_warnings.items())

    for count,line in enumerate(lines):

        if any( i in line for i in all_warnings):
            messages = [ all_warnings[i] if all_warnings[i] is not None 
                            else line for i in all_warnings.keys() 
                            if i in line]
                                               
            if '%%%%%%%%%%%%%%' in line:
                messages = parse_QE_errors(lines,count,parsed_data['warnings']) 
                        
            # if it found something, add to log
            if len(messages)>0:
                parsed_data['warnings'].extend(messages)
            
    return parsed_data,critical_warnings.values()
Exemplo n.º 16
0
def parse_raw_ph_output(out_file, tensor_file=None, dynmat_files=[]):
    """
    Parses the output of a calculation
    Receives in input the paths to the output file and the xml file.
    
    Args: 
        out_file 
            path to ph std output
    
    Returns:
        out_dict
            a dictionary with parsed data
        successful
            a boolean that is False in case of failed calculations
            
    Raises:
        QEOutputParsingError
            for errors in the parsing

    2 different keys to check in output: parser_warnings and warnings.
    On an upper level, these flags MUST be checked.
    The first two are expected to be empty unless QE failures or unfinished jobs.
    """
    
    job_successful = True
    
    parser_version = '0.1'
    parser_info = {}
    parser_info['parser_warnings'] = []
    parser_info['parser_info'] = 'AiiDA QE-PH Parser v{}'.format(parser_version)
    
    # load QE out file
    try:
        with open(out_file,'r') as f:
            out_lines = f.readlines()
    except IOError:
        # if the file cannot be open, the error is severe.
        raise QEOutputParsingError("Failed to open output file: {}.".format(out_file))
    
    # in case of executable failures, check if there is any output at all
    if not out_lines:
        job_successful = False
    
    # check if the job has finished (that doesn't mean without errors)
    finished_run = False
    for line in out_lines[::-1]:
        if 'JOB DONE' in line:
            finished_run = True
            break
    
    if not finished_run:
        warning = 'QE ph run did not reach the end of the execution.'
        parser_info['parser_warnings'].append(warning)        
        job_successful = False
    
    # parse tensors, if present
    tensor_data = {}
    if tensor_file:
        with open(tensor_file,'r') as f:
            tensor_lines = f.read()
        try:
            tensor_data = parse_ph_tensor(tensor_lines)
        except QEOutputParsingError:
            parser_info['parser_warnings'].append('Error while parsing the tensor files')
            pass
    
    # parse ph output
    with open(out_file,'r') as f:
        out_lines = f.readlines()
    out_data,critical_messages = parse_ph_text_output(out_lines)
    
    # if there is a severe error, the calculation is FAILED
    if any([x in out_data['warnings'] for x in critical_messages]):
        job_successful = False
    
    # parse dynamical matrices if present
    dynmat_data = {}
    if dynmat_files:
        # find lattice parameter
        for dynmat_counter,this_dynmat in enumerate(dynmat_files):
            # read it
            with open(this_dynmat,'r') as f:
                lines = f.readlines()
            
            # check if the file contains frequencies (i.e. is useful) or not
            dynmat_to_parse = False
            if not lines:
                continue
            try:
                _ = [ float(i) for i in lines[0].split()]
            except ValueError:
                dynmat_to_parse = True
            if not dynmat_to_parse:
                continue
            
            # parse it
            this_dynmat_data = parse_ph_dynmat(lines) 
            
            # join it with the previous dynmat info
            dynmat_data['dynamical_matrix_%s' % dynmat_counter] = this_dynmat_data
            # TODO: use the bands format?

    # join dictionaries, there should not be any twice repeated key
    for key in out_data.keys():
        if key in tensor_data.keys():
            raise AssertionError('{} found in two dictionaries'.format(key))
    for key in out_data.keys():
        if key in dynmat_data.keys():
            if key=='warnings': # this ke can be found in both, but is not a problem
                out_data['warnings'] += dynmat_data['warnings']
                del dynmat_data['warnings']
            else:
                raise AssertionError('{} found in two dictionaries'.format(key))
    # I don't check the dynmat_data and parser_info keys 
    final_data = dict(dynmat_data.items() + out_data.items() + 
                      tensor_data.items() + parser_info.items())

    return final_data,job_successful
Exemplo n.º 17
0
def xml_card_ions(parsed_data, dom, lattice_vectors, volume):
    cardname = 'IONS'
    target_tags = read_xml_card(dom, cardname)

    for tagname in ['NUMBER_OF_ATOMS', 'NUMBER_OF_SPECIES']:
        parsed_data[tagname.lower()] = parse_xml_child_integer(
            tagname, target_tags)

    tagname = 'UNITS_FOR_ATOMIC_MASSES'
    attrname = 'UNITS'
    parsed_data[tagname.lower()] = parse_xml_child_attribute_str(
        tagname, attrname, target_tags)

    try:
        parsed_data['species'] = {}
        parsed_data['species']['index'] = []
        parsed_data['species']['type'] = []
        parsed_data['species']['mass'] = []
        parsed_data['species']['pseudo'] = []
        for i in range(parsed_data['number_of_species']):
            tagname = 'SPECIE.' + str(i + 1)
            parsed_data['species']['index'].append(i + 1)

            # a=target_tags.getElementsByTagName(tagname)[0]
            a = [_ for _ in target_tags.childNodes if _.nodeName == tagname][0]

            tagname2 = 'ATOM_TYPE'
            parsed_data['species']['type'].append(
                parse_xml_child_str(tagname2, a))

            tagname2 = 'MASS'
            parsed_data['species']['mass'].append(
                parse_xml_child_float(tagname2, a))

            tagname2 = 'PSEUDO'
            parsed_data['species']['pseudo'].append(
                parse_xml_child_str(tagname2, a))

        tagname = 'UNITS_FOR_ATOMIC_POSITIONS'
        attrname = 'UNITS'
        parsed_data[tagname.lower()] = parse_xml_child_attribute_str(
            tagname, attrname, target_tags)
    except:
        raise QEOutputParsingError('Error parsing tag SPECIE.# inside %s.' %
                                   (target_tags.tagName))
    # TODO convert the units
    # if parsed_data['units_for_atomic_positions'] not in ['alat','bohr','angstrom']:

    try:
        atomlist = []
        atoms_index_list = []
        atoms_if_pos_list = []
        tagslist = []
        for i in range(parsed_data['number_of_atoms']):
            tagname = 'ATOM.' + str(i + 1)
            # USELESS AT THE MOMENT, I DON'T SAVE IT
            # parsed_data['atoms']['list_index']=i
            # a=target_tags.getElementsByTagName(tagname)[0]
            a = [_ for _ in target_tags.childNodes if _.nodeName == tagname][0]
            tagname2 = 'INDEX'
            b = int(a.getAttribute(tagname2))
            atoms_index_list.append(b)
            tagname2 = 'SPECIES'

            chem_symbol = str(a.getAttribute(tagname2)).rstrip().replace(
                "\n", "")
            # I check if it is a subspecie
            chem_symbol_digits = "".join(
                [i for i in chem_symbol if i in string.digits])
            try:
                tagslist.append(int(chem_symbol_digits))
            except ValueError:
                # If I can't parse the digit, it is probably not there: I add a None to the tagslist
                tagslist.append(None)
            # I remove the symbols
            chem_symbol = chem_symbol.translate(None, string.digits)

            tagname2 = 'tau'
            b = a.getAttribute(tagname2)
            tau = [float(s) for s in b.rstrip().replace("\n", "").split()]
            metric = parsed_data['units_for_atomic_positions']
            if metric not in ['alat', 'bohr',
                              'angstrom']:  # REMEMBER TO CONVERT AT THE END
                raise QEOutputParsingError('Error parsing tag %s inside %s' %
                                           (tagname, target_tags.tagName))
            if metric == 'alat':
                tau = [
                    parsed_data['lattice_parameter_xml'] * float(s)
                    for s in tau
                ]
            elif metric == 'bohr':
                tau = [bohr_to_ang * float(s) for s in tau]
            atomlist.append([chem_symbol, tau])
            tagname2 = 'if_pos'
            b = a.getAttribute(tagname2)
            if_pos = [int(s) for s in b.rstrip().replace("\n", "").split()]
            atoms_if_pos_list.append(if_pos)
        parsed_data['atoms'] = atomlist
        parsed_data['atoms_index_list'] = atoms_index_list
        parsed_data['atoms_if_pos_list'] = atoms_if_pos_list
        cell = {}
        cell['lattice_vectors'] = lattice_vectors
        cell['volume'] = volume
        cell['atoms'] = atomlist
        cell['tagslist'] = tagslist
        parsed_data['cell'] = cell
    except Exception:
        raise QEOutputParsingError('Error parsing tag ATOM.# inside %s.' %
                                   (target_tags.tagName))
    # saving data together with cell parameters. Did so for better compatibility with ASE.

    # correct some units that have been converted in
    parsed_data['atomic_positions' + units_suffix] = default_length_units
    parsed_data['direct_lattice_vectors' + units_suffix] = default_length_units

    return parsed_data
Exemplo n.º 18
0
def grep_energy_from_line(line):
    try:
        return float(line.split('=')[1].split('Ry')[0]) * ry_to_ev
    except Exception:
        raise QEOutputParsingError('Error while parsing energy')
Exemplo n.º 19
0
def xml_card_cell(parsed_data, dom):
    # CARD CELL of QE output

    cardname = 'CELL'
    target_tags = read_xml_card(dom, cardname)

    for tagname in ['NON-PERIODIC_CELL_CORRECTION', 'BRAVAIS_LATTICE']:
        parsed_data[tagname.replace('-', '_').lower()] = parse_xml_child_str(
            tagname, target_tags)

    tagname = 'LATTICE_PARAMETER'
    value = parse_xml_child_float(tagname, target_tags)
    parsed_data[tagname.replace('-', '_').lower() + '_xml'] = value
    attrname = 'UNITS'
    metric = parse_xml_child_attribute_str(tagname, attrname, target_tags)
    if metric not in ['bohr', 'angstrom']:
        raise QEOutputParsingError(
            'Error parsing attribute {}, tag {} inside {}, units not found'.
            format(attrname, tagname, target_tags.tagName))
    if metric == 'bohr':
        value *= bohr_to_ang
    parsed_data[tagname.replace('-', '_').lower()] = value

    tagname = 'CELL_DIMENSIONS'
    try:
        #a=target_tags.getElementsByTagName(tagname)[0]
        a = [_ for _ in target_tags.childNodes if _.nodeName == tagname][0]
        b = a.childNodes[0]
        c = b.data.replace('\n', '').split()
        value = [float(i) for i in c]
        parsed_data[tagname.replace('-', '_').lower()] = value
    except Exception:
        raise QEOutputParsingError('Error parsing tag {} inside {}.'.format(
            tagname, target_tags.tagName))

    tagname = 'DIRECT_LATTICE_VECTORS'
    lattice_vectors = []
    try:
        second_tagname = 'UNITS_FOR_DIRECT_LATTICE_VECTORS'
        #a=target_tags.getElementsByTagName(tagname)[0]
        a = [_ for _ in target_tags.childNodes if _.nodeName == tagname][0]
        b = a.getElementsByTagName('UNITS_FOR_DIRECT_LATTICE_VECTORS')[0]
        value = str(b.getAttribute('UNITS')).lower()
        parsed_data[second_tagname.replace('-', '_').lower()] = value

        metric = value
        if metric not in [
                'bohr', 'angstroms'
        ]:  # REMEMBER TO CHECK THE UNITS AT THE END OF THE FUNCTION
            raise QEOutputParsingError(
                'Error parsing tag {} inside {}: units not supported: {}'.
                format(tagname, target_tags.tagName, metric))

        lattice_vectors = []
        for second_tagname in ['a1', 'a2', 'a3']:
            #b = a.getElementsByTagName(second_tagname)[0]
            b = [_ for _ in a.childNodes if _.nodeName == second_tagname][0]
            c = b.childNodes[0]
            d = c.data.replace('\n', '').split()
            value = [float(i) for i in d]
            if metric == 'bohr':
                value = [bohr_to_ang * float(s) for s in value]
            lattice_vectors.append(value)

        volume = cell_volume(lattice_vectors[0], lattice_vectors[1],
                             lattice_vectors[2])

    except Exception:
        raise QEOutputParsingError(
            'Error parsing tag {} inside {} inside {}.'.format(
                tagname, target_tags.tagName, cardname))
    # NOTE: lattice_vectors will be saved later together with card IONS.atom

    tagname = 'RECIPROCAL_LATTICE_VECTORS'
    try:
        #a = target_tags.getElementsByTagName(tagname)[0]
        a = [_ for _ in target_tags.childNodes if _.nodeName == tagname][0]

        second_tagname = 'UNITS_FOR_RECIPROCAL_LATTICE_VECTORS'
        b = a.getElementsByTagName(second_tagname)[0]
        value = str(b.getAttribute('UNITS')).lower()
        parsed_data[second_tagname.replace('-', '_').lower()] = value

        metric = value
        # NOTE: output is given in 2 pi / a [ang ^ -1]
        if metric not in ['2 pi / a']:
            raise QEOutputParsingError(
                'Error parsing tag {} inside {}: units {} not supported'.
                format(tagname, target_tags.tagName, metric))

        # reciprocal_lattice_vectors
        this_matrix = []
        for second_tagname in ['b1', 'b2', 'b3']:
            b = a.getElementsByTagName(second_tagname)[0]
            c = b.childNodes[0]
            d = c.data.replace('\n', '').split()
            value = [float(i) for i in d]
            if metric == '2 pi / a':
                value = [
                    float(s) / parsed_data['lattice_parameter'] for s in value
                ]
            this_matrix.append(value)
        parsed_data['reciprocal_lattice_vectors'] = this_matrix

    except Exception:
        raise QEOutputParsingError('Error parsing tag {} inside {}.'.format(
            tagname, target_tags.tagName))
    return parsed_data, lattice_vectors, volume
Exemplo n.º 20
0
def parse_cp_text_output(data, xml_data):
    """
    data must be a list of strings, one for each lines, as returned by readlines(). 
    On output, a dictionary with parsed values
    """
    # TODO: uniform readlines() and read() usage for passing input to the parser

    parsed_data = {}
    parsed_data['warnings'] = []

    for count, line in enumerate(data):

        if 'warning' in line.lower():
            parsed_data['warnings'].append(line)
        elif 'bananas' in line:
            parsed_data['warnings'].append('Bananas from the ortho.')
        elif 'CP' in line and 'WALL' in line:
            try:
                time = line.split('CPU')[1].split('WALL')[0]
                parsed_data['wall_time'] = time
            except:
                raise QEOutputParsingError('Error while parsing wall time.')

    for count, line in enumerate(reversed(data)):
        if 'nfi' in line and 'ekinc' in line and 'econs' in line:
            this_line = data[len(data) - count]
            try:
                parsed_data['ekinc'] = [float(this_line.split()[1])]
            except ValueError:
                pass
            try:
                parsed_data['temph'] = [float(this_line.split()[2])]
            except ValueError:
                pass
            try:
                parsed_data['tempp'] = [float(this_line.split()[3])]
            except ValueError:
                pass
            try:
                parsed_data['etot'] = [float(this_line.split()[4])]
            except ValueError:
                pass
            try:
                parsed_data['enthal'] = [float(this_line.split()[5])]
            except ValueError:
                pass
            try:
                parsed_data['econs'] = [float(this_line.split()[6])]
            except ValueError:
                pass
            try:
                parsed_data['econt'] = [float(this_line.split()[7])]
            except ValueError:
                pass
            try:
                parsed_data['vnhh'] = [float(this_line.split()[8])]
            except (ValueError, IndexError):
                pass
            try:
                parsed_data['xnhh0'] = [float(this_line.split()[9])]
            except (ValueError, IndexError):
                pass
            try:
                parsed_data['vnhp'] = [float(this_line.split()[10])]
            except (ValueError, IndexError):
                pass
            try:
                parsed_data['xnhp0'] = [float(this_line.split()[11])]
            except (ValueError, IndexError):
                pass

    return parsed_data
Exemplo n.º 21
0
def parse_neb_text_output(data, input_dict={}):
    """
    Parses the text output of QE Neb.
    
    :param data: a string, the file as read by read()
    :param input_dict: dictionary with the input parameters
    
    :return parsed_data: dictionary with key values, referring to quantities 
                         at the last step.
    :return iteration_data: key,values referring to intermediate iterations.
                             Empty dictionary if no value is present.
    :return critical_messages: a list with critical messages. If any is found in
                               parsed_data['warnings'], the calculation is FAILED!
    """
    from aiida.parsers.plugins.quantumespresso.raw_parser_pw import parse_QE_errors
    from collections import defaultdict

    # TODO: find a more exhaustive list of the common errors of neb

    # critical warnings: if any is found, the calculation status is FAILED
    critical_warnings = {
        'scf convergence NOT achieved on image':
        'SCF did not converge for a given image',
        'Maximum CPU time exceeded': 'Maximum CPU time exceeded',
        'reached the maximum number of steps':
        'Maximum number of iterations reached in the image optimization',
        '%%%%%%%%%%%%%%': None,
    }

    minor_warnings = {
        'Warning:': None,
    }

    all_warnings = dict(critical_warnings.items() + minor_warnings.items())

    parsed_data = {}
    parsed_data['warnings'] = []
    iteration_data = defaultdict(list)

    # parse time, starting from the end
    # apparently, the time is written multiple times
    for line in reversed(data.split('\n')):
        if 'NEB' in line and 'WALL' in line:
            try:
                time = line.split('CPU')[1].split('WALL')[0].strip()
                parsed_data['wall_time'] = time
            except Exception:
                parsed_data['warnings'].append(
                    'Error while parsing wall time.')

            try:
                parsed_data['wall_time_seconds'] = \
                    convert_qe_time_to_sec(parsed_data['wall_time'])
            except ValueError:
                raise QEOutputParsingError(
                    "Unable to convert wall_time in seconds.")
            break

    # set by default the calculation as not converged.
    parsed_data['converged'] = [False, 0]

    for count, line in enumerate(data.split('\n')):
        if 'initial path length' in line:
            initial_path_length = float(line.split('=')[1].split('bohr')[0])
            parsed_data[
                'initial_path_length'] = initial_path_length * bohr_to_ang
        elif 'initial inter-image distance' in line:
            initial_image_dist = float(line.split('=')[1].split('bohr')[0])
            parsed_data[
                'initial_image_dist'] = initial_image_dist * bohr_to_ang
        elif 'string_method' in line:
            parsed_data['string_method'] = line.split('=')[1].strip()
        elif 'restart_mode' in line:
            parsed_data['restart_mode'] = line.split('=')[1].strip()
        elif 'opt_scheme' in line:
            parsed_data['opt_scheme'] = line.split('=')[1].strip()
        elif 'num_of_images' in line:
            parsed_data['num_of_images'] = int(line.split('=')[1])
        elif 'nstep_path' in line:
            parsed_data['nstep_path'] = int(line.split('=')[1])
        elif 'CI_scheme' in line:
            parsed_data['ci_scheme'] = line.split('=')[1].strip()
        elif 'first_last_opt' in line:
            parsed_data['first_last_opt'] = True if line.split(
                '=')[1] == 'T' else False
        elif 'use_freezing' in line:
            parsed_data['use_freezing'] = True if line.split(
                '=')[1] == 'T' else False
        elif ' ds ' in line:
            parsed_data['ds_au'] = float(line.split('=')[1].split('a.u.')[0])
        elif '   k_max' in line:
            parsed_data['k_max'] = float(line.split('=')[1].split('a.u.')[0])
        elif '   k_min_au' in line:
            parsed_data['k_min_au'] = float(
                line.split('=')[1].split('a.u.')[0])
        elif 'suggested k_max' in line:
            parsed_data['suggested_k_max_au'] = float(
                line.split('=')[1].split('a.u.')[0])
        elif 'suggested k_min' in line:
            parsed_data['suggested_k_min_au'] = float(
                line.split('=')[1].split('a.u.')[0])
        elif 'path_thr' in line:
            parsed_data['path_thr'] = float(line.split('=')[1].split('eV')[0])
        elif 'list of climbing images' in line:
            parsed_data['climbing_images_manual'] = [
                int(_) for _ in line.split(':')[1].split(',')[:-1]
            ]
        elif 'neb: convergence achieved in' in line:
            parsed_data['converged'] = [
                True, int(line.split('iteration')[0].split()[-1])
            ]
        elif any(i in line for i in all_warnings):
            message = [
                all_warnings[i] for i in all_warnings.keys() if i in line
            ][0]
            if message is None:
                message = line

            if '%%%%%%%%%%%%%%' in line:
                message = None
                messages = parse_QE_errors(data.split('\n'), count,
                                           parsed_data['warnings'])

            # if it found something, add to log
            try:
                parsed_data['warnings'].extend(messages)
            except UnboundLocalError:
                pass
            if message is not None:
                parsed_data['warnings'].append(message)

    try:
        num_images = parsed_data['num_of_images']
    except KeyError:
        try:
            num_images = input_dict['PATH']['num_of_images']
        except KeyError:
            raise QEOutputParsingError(
                "No information on the number "
                "of images available (neither in input nor in output")

    iteration_lines = data.split('-- iteration')[1:]
    iteration_lines = [i.split('\n') for i in iteration_lines]

    for iteration in iteration_lines:
        for count, line in enumerate(iteration):
            if 'activation energy (->)' in line:
                activ_energy = float(line.split('=')[1].split('eV')[0])
                iteration_data['forward_activation_energy'].append(
                    activ_energy)
            elif 'activation energy (<-)' in line:
                activ_energy = float(line.split('=')[1].split('eV')[0])
                iteration_data['backward_activation_energy'].append(
                    activ_energy)
            elif 'image        energy (eV)        error (eV/A)        frozen' in line:
                energies = []
                forces = []
                frozen = []
                try:
                    for i in range(num_images):
                        split_line = iteration[count + 2 + i].split()[1:]
                        energies.append(float(split_line[0]))
                        forces.append(float(split_line[1]))
                        frozen.append(True if split_line[2] == 'T' else False)
                    iteration_data['image_energies'].append(energies)
                    iteration_data['image_forces'].append(forces)
                    iteration_data['image_frozen'].append(frozen)
                except Exception:
                    parsed_data['warnings'].append(
                        'Error while parsing the image energies and forces.')
            elif 'climbing image' in line:
                iteration_data['climbing_image_auto'].append(
                    [int(_) for _ in line.split('=')[1].split(',')])
            elif 'path length' in line:
                path_length = float(line.split('=')[1].split('bohr')[0])
                iteration_data['path_length'].append(path_length * bohr_to_ang)
            elif 'inter-image distance' in line:
                image_dist = float(line.split('=')[1].split('bohr')[0])
                iteration_data['image_dist'].append(image_dist * bohr_to_ang)

    return parsed_data, dict(iteration_data), critical_warnings.values()
Exemplo n.º 22
0
def parse_pw_text_output(data,
                         xml_data=None,
                         structure_data=None,
                         input_dict=None):
    """
    Parses the text output of QE-PWscf.
    
    :param data: a string, the file as read by read()
    :param xml_data: the dictionary with the keys read from xml.
    :param structure_data: dictionary, coming from the xml, with info on the structure
    
    :return parsed_data: dictionary with key values, referring to quantities 
                         at the last scf step.
    :return trajectory_data: key,values referring to intermediate scf steps, 
                             as in the case of vc-relax. Empty dictionary if no
                             value is present.
    :return critical_messages: a list with critical messages. If any is found in
                               parsed_data['warnings'], the calculation is FAILED!
    """

    parsed_data = {}
    parsed_data['warnings'] = []
    vdw_correction = False
    trajectory_data = {}

    # critical warnings: if any is found, the calculation status is FAILED
    critical_warnings = {
        'The maximum number of steps has been reached.':
        "The maximum step of the ionic/electronic relaxation has been reached.",
        'convergence NOT achieved after':
        "The scf cycle did not reach convergence.",
        # 'eigenvalues not converged':None, # special treatment
        'iterations completed, stopping':
        'Maximum number of iterations reached in Wentzcovitch Damped Dynamics.',
        'Maximum CPU time exceeded': 'Maximum CPU time exceeded',
        '%%%%%%%%%%%%%%': None,
    }

    minor_warnings = {
        'Warning:':
        None,
        'DEPRECATED:':
        None,
        'incommensurate with FFT grid':
        'The FFT is incommensurate: some symmetries may be lost.',
        'SCF correction compared to forces is too large, reduce conv_thr':
        "Forces are inaccurate (SCF correction is large): reduce conv_thr.",
    }

    all_warnings = dict(critical_warnings.items() + minor_warnings.items())

    # Find some useful quantities.
    try:
        for line in data.split('\n'):
            if 'lattice parameter (alat)' in line:
                alat = float(line.split('=')[1].split('a.u')[0])
            elif 'number of atoms/cell' in line:
                nat = int(line.split('=')[1])
            elif 'number of atomic types' in line:
                ntyp = int(line.split('=')[1])
            elif 'unit-cell volume' in line:
                volume = float(line.split('=')[1].split('(a.u.)^3')[0])
            elif 'number of Kohn-Sham states' in line:
                nbnd = int(line.split('=')[1])
                break
        alat *= bohr_to_ang
        volume *= bohr_to_ang**3
        parsed_data['number_of_bands'] = nbnd
    except NameError:  # nat or other variables where not found, and thus not initialized
        # try to get some error message
        for count, line in enumerate(data.split('\n')):
            if any(i in line for i in all_warnings):
                messages = [
                    all_warnings[i] if all_warnings[i] is not None else line
                    for i in all_warnings.keys() if i in line
                ]

                if '%%%%%%%%%%%%%%' in line:
                    messages = parse_QE_errors(data.split('\n'), count,
                                               parsed_data['warnings'])

                    # if it found something, add to log
                if len(messages) > 0:
                    parsed_data['warnings'].extend(messages)

        if len(parsed_data['warnings']) > 0:
            return parsed_data, trajectory_data, critical_warnings.values()
        else:
            # did not find any error message -> raise an Error and do not
            # return anything
            raise QEOutputParsingError("Parser can't load basic info.")

    # Save these two quantities in the parsed_data, because they will be
    # useful for queries (maybe), and structure_data will not be stored as a ParameterData
    parsed_data['number_of_atoms'] = nat
    parsed_data['number_of_species'] = ntyp
    parsed_data['volume'] = volume

    c_bands_error = False

    # now grep quantities that can be considered isolated informations.
    for count, line in enumerate(data.split('\n')):

        # special parsing of c_bands error
        if 'c_bands' in line and 'eigenvalues not converged' in line:
            c_bands_error = True
        elif "iteration #" in line and c_bands_error:
            # if there is another iteration, c_bands is not necessarily a problem
            # I put a warning only if c_bands error appears in the last iteration
            c_bands_error = False

        # Parsing of errors
        elif any(i in line for i in all_warnings):
            message = [
                all_warnings[i] for i in all_warnings.keys() if i in line
            ][0]
            if message is None:
                message = line

            # if the run is a molecular dynamics, I ignore that I reached the
            # last iteration step.
            if ('The maximum number of steps has been reached.' in line
                    and 'md' in input_dict['CONTROL']['calculation']):
                message = None

            if 'iterations completed, stopping' in line:
                value = message
                message = None
                if 'Wentzcovitch Damped Dynamics:' in line:
                    dynamic_iterations = int(line.split()[3])
                    if max_dynamic_iterations == dynamic_iterations:
                        message = value

            if '%%%%%%%%%%%%%%' in line:
                message = None
                messages = parse_QE_errors(data.split('\n'), count,
                                           parsed_data['warnings'])

                # if it found something, add to log
            try:
                parsed_data['warnings'].extend(messages)
            except UnboundLocalError:
                pass
            if message is not None:
                parsed_data['warnings'].append(message)

    if c_bands_error:
        parsed_data['warnings'].append(
            "c_bands: at least 1 eigenvalues not converged")

    # I split the output text in the atomic SCF calculations.
    # the initial part should be things already contained in the xml.
    # (cell, initial positions, kpoints, ...) and I skip them.
    # In case, parse for them before this point.
    # Put everything in a trajectory_data dictionary
    relax_steps = data.split('Self-consistent Calculation')[1:]
    relax_steps = [i.split('\n') for i in relax_steps]

    # now I create a bunch of arrays for every step.
    for data_step in relax_steps:
        for count, line in enumerate(data_step):

            # NOTE: in the above, the chemical symbols are not those of AiiDA
            # since the AiiDA structure is different. So, I assume now that the
            # order of atoms is the same of the input atomic structure.

            # Computed dipole correction in slab geometries.
            # save dipole in debye units, only at last iteration of scf cycle

            # grep energy and eventually, magnetization
            if '!' in line:
                if 'makov-payne' in line.lower():
                    try:
                        for key in ['total', 'envir']:
                            if key in line.lower():
                                En = float(line.split('=')[1].split('Ry')
                                           [0]) * ry_to_ev
                                try:
                                    trajectory_data[key +
                                                    '_makov-payne'].append(En)
                                except KeyError:
                                    trajectory_data[key +
                                                    '_makov-payne'] = [En]
                                    parsed_data[
                                        key + '_makov-payne' +
                                        units_suffix] = default_energy_units
                    except Exception:
                        parsed_data['warnings'].append(
                            'Error while parsing the energy')
                else:
                    try:
                        for key in ['energy', 'energy_accuracy']:
                            if key not in trajectory_data:
                                trajectory_data[key] = []

                        En = float(
                            line.split('=')[1].split('Ry')[0]) * ry_to_ev
                        E_acc = float(data_step[count + 2].split('<')[1].split(
                            'Ry')[0]) * ry_to_ev

                        for key, value in [['energy', En],
                                           ['energy_accuracy', E_acc]]:
                            trajectory_data[key].append(value)
                            parsed_data[key +
                                        units_suffix] = default_energy_units
                    except Exception:
                        parsed_data['warnings'].append(
                            'Error while parsing the energy')

            elif 'the Fermi energy is' in line:
                try:
                    value = line.split('is')[1].split('ev')[0]
                    try:
                        trajectory_data['fermi_energy'].append(value)
                    except KeyError:
                        trajectory_data['fermi_energy'] = [value]
                    parsed_data['fermi_energy' +
                                units_suffix] = default_energy_units
                except Exception:
                    parsed_data['warnings'].append(
                        'Error while parsing Fermi energy from the output file.'
                    )

            elif 'Forces acting on atoms (Ry/au):' in line:
                try:
                    forces = []
                    j = 0
                    while True:
                        j += 1
                        line2 = data_step[count + j]
                        if 'atom ' in line2:
                            line2 = line2.split('=')[1].split()
                            # CONVERT FORCES IN eV/Ang
                            vec = [float(s) * ry_to_ev / \
                                   bohr_to_ang for s in line2]
                            forces.append(vec)
                        if len(forces) == nat:
                            break
                    try:
                        trajectory_data['forces'].append(forces)
                    except KeyError:
                        trajectory_data['forces'] = [forces]
                    parsed_data['forces' + units_suffix] = default_force_units
                except Exception:
                    parsed_data['warnings'].append(
                        'Error while parsing forces.')

            # TODO: adding the parsing support for the decomposition of the forces

            elif 'Total force =' in line:
                try:  # note that I can't check the units: not written in output!
                    value = float(line.split('=')[1].split('Total')
                                  [0]) * ry_to_ev / bohr_to_ang
                    try:
                        trajectory_data['total_force'].append(value)
                    except KeyError:
                        trajectory_data['total_force'] = [value]
                    parsed_data['total_force' +
                                units_suffix] = default_force_units
                except Exception:
                    parsed_data['warnings'].append(
                        'Error while parsing total force.')

            elif 'entering subroutine stress ...' in line:
                try:
                    stress = []
                    for k in range(10):
                        if "P=" in data_step[count + k + 1]:
                            count2 = count + k + 1
                    if '(Ry/bohr**3)' not in data_step[count2]:
                        raise QEOutputParsingError(
                            'Error while parsing stress: unexpected units.')
                    for k in range(3):
                        line2 = data_step[count2 + k + 1].split()
                        vec = [
                            float(s) * 10**(-9) * ry_si / (bohr_si)**3
                            for s in line2[0:3]
                        ]
                        stress.append(vec)
                    try:
                        trajectory_data['stress'].append(stress)
                    except KeyError:
                        trajectory_data['stress'] = [stress]
                    parsed_data['stress' + units_suffix] = default_stress_units
                except Exception:
                    parsed_data['warnings'].append(
                        'Error while parsing stress tensor.')

    return parsed_data, trajectory_data, critical_warnings.values()
Exemplo n.º 23
0
def parse_raw_out_basic(out_file, calc_name):
    """
    A very simple parser for the standard out, usually aiida.out. Currently
    only parses basic warnings and the walltime.
    :param out_file: the standard out to be parsed
    :param calc_name: the name of the calculation, e.g. PROJWFC
    :return: parsed_data
    """

    # read file
    parsed_data = {}
    parsed_data['warnings'] = []
    # critical warnings: if any is found, the calculation status is FAILED
    critical_warnings = {'Maximum CPU time exceeded':'Maximum CPU time exceeded',
                         '%%%%%%%%%%%%%%':None,
                         }

    minor_warnings = {'Warning:':None,
                      'DEPRECATED:':None,
                      }
    all_warnings = dict(critical_warnings.items() + minor_warnings.items())
    for count in range (len(out_file)):
        line = out_file[count]
        # parse the global file, for informations that are written only once
        if calc_name in line and 'WALL' in line:
            try:
                time = line.split('CPU')[1].split('WALL')[0]
                cpu_time = line.split(':')[1].split('CPU')[0]
                parsed_data['wall_time'] = time
                parsed_data['cpu_time'] = cpu_time
            except ValueError:
                parsed_data['warnings'].append('Error while parsing wall time.')
            try:
                parsed_data['wall_time_seconds'] = convert_qe_time_to_sec(time)
                parsed_data['cpu_time_seconds'] = convert_qe_time_to_sec(cpu_time)
            except ValueError:
                raise QEOutputParsingError("Unable to convert wall_time in seconds.")
            # Parsing of errors
        elif any( i in line for i in all_warnings):
            message = [ all_warnings[i] for i in all_warnings.keys() if i in line][0]
            if message is None:
                message = line
            if '%%%%%%%%%%%%%%' in line:
                message  = None
                messages = parse_QE_errors(out_file,count,parsed_data['warnings'])
            # if it found something, add to log
            try:
                parsed_data['warnings'].extend(messages)
            except UnboundLocalError:
                pass
            if message is not None:
                parsed_data['warnings'].append(message)
        elif 'Fermi energy' in line and '=' in line:
            fermi_energy = line.split('=')[1].split('eV')[0]
            parsed_data['fermi_energy'] = fermi_energy
            parsed_data['fermi_energy_units'] = 'eV'
        elif 'Drude plasma frequency (xx)' in line:
            drude_plasma_freq_xx =  line.split('=')[1].split('eV')[0]
            parsed_data['drude_plasma_frequency_xx'] = drude_plasma_freq_xx
            parsed_data['drude_plasma_frequency_units'] = 'eV'
        elif 'Drude plasma frequency (yy)' in line:
            drude_plasma_freq_yy =  line.split('=')[1].split('eV')[0]
            parsed_data['drude_plasma_frequency_yy'] = drude_plasma_freq_yy
        elif 'Drude plasma frequency (zz)' in line:
            drude_plasma_freq_zz =  line.split('=')[1].split('eV')[0]
            parsed_data['drude_plasma_frequency_zz'] = drude_plasma_freq_zz
        elif 'Drude plasma frequency (xy)' in line:
            drude_plasma_freq_xy =  line.split('=')[1].split('eV')[0]
            parsed_data['drude_plasma_frequency_xy'] = drude_plasma_freq_xy
        elif 'Drude plasma frequency (xz)' in line:
            drude_plasma_freq_xz =  line.split('=')[1].split('eV')[0]
            parsed_data['drude_plasma_frequency_xz'] = drude_plasma_freq_xz
        elif 'Drude plasma frequency (yz)' in line:
            drude_plasma_freq_yz =  line.split('=')[1].split('eV')[0]
            parsed_data['drude_plasma_frequency_yz'] = drude_plasma_freq_yz
            
    return parsed_data
Exemplo n.º 24
0
    def _parse_bands_and_projections(self, out_info_dict):
        """
        Function that parsers the standard out into bands and projection
        data.
        :param standard_out: standard out file in form of a list
        :param out_info_dict: used to pass technical internal variables
                              to helper functions in compact form

        :return: append_nodes_list a list containing BandsData and
                 ProjectionData parsed from standard_out
        """
        out_file = out_info_dict["out_file"]
        out_info_dict["k_lines"] = []
        out_info_dict["e_lines"] = []
        out_info_dict["psi_lines"] = []
        out_info_dict["wfc_lines"] = []
        append_nodes_list = []

        for i in range(len(out_file)):
            if "k =" in out_file[i]:
                out_info_dict["k_lines"].append(copy.deepcopy(i))
            if "==== e(" in out_file[i]:
                out_info_dict["e_lines"].append(i)
            if "|psi|^2" in out_file[i]:
                out_info_dict["psi_lines"].append(i)
            if "state #" in out_file[i]:
                out_info_dict["wfc_lines"].append(i)

        #Basic check
        if len(out_info_dict["e_lines"]) != len(out_info_dict["psi_lines"]):
            raise QEOutputParsingError("Not formatted in a manner "
            " that can be handled")
        if len(out_info_dict["psi_lines"]) % len(out_info_dict["k_lines"]) != 0:
            raise QEOutputParsingError("Band Energy Points is not "
            " a multiple of kpoints")
        #calculates the number of bands
        out_info_dict["num_bands"] = len(
            out_info_dict["psi_lines"])/len(out_info_dict["k_lines"])

        # Uses the parent input parameters, and checks if the parent used
        # spin calculations try to replace with a query, if possible.
        parent_remote =  self._calc.get_inputs_dict()['parent_calc_folder']
        parent_calc = parent_remote.get_inputs_dict()['remote_folder']
        out_info_dict["parent_calc"] = parent_calc
        parent_param = parent_calc.get_outputs_dict()['output_parameters']
        try:
            structure = parent_calc.get_inputs_dict()['structure']
        except KeyError:
            raise ValueError("The parent had no structure! Cannot parse"
                             "from this!")
        try :
            nspin = parent_param.get_dict()['number_of_spin_components']
            if nspin != 1:
                spin = True
            else:
                spin = False
        except KeyError:
            spin = False
        out_info_dict["spin"] = spin

        #changes k-numbers to match spin
        #because if spin is on, k points double for up and down
        out_info_dict["k_states"] = len(out_info_dict["k_lines"])
        if spin:
            if out_info_dict["k_states"] % 2 != 0:
                raise ValueError("Internal formatting error regarding spin")
            out_info_dict["k_states"] = out_info_dict["k_states"]/2

        #   adds in the k-vector for each kpoint
        k_vect = [out_file[out_info_dict["k_lines"][i]].split()[2:]
                  for i in range(out_info_dict["k_states"])]
        out_info_dict["k_vect"] = np.array(k_vect)
        out_info_dict["structure"] = structure
        out_info_dict["orbitals"] = find_orbitals_from_statelines(out_info_dict)

        if spin:
            # I had to guess what the ordering of the spin is, because
            # the projwfc.x documentation doesn't say, but looking at the
            # source code I found:
            #
            # DO is=1,nspin
            #   IF (nspin==2) THEN
            #       IF (is==1) filename=trim(filproj)//'.up'
            #       IF (is==2) filename=trim(filproj)//'.down'
            #
            # Which would say that it is reasonable to assume that the
            # spin up states are written first, then spin down
            #
            out_info_dict["spin_down"] = False
            bands_data1, projection_data1 = spin_dependent_subparcer(
                out_info_dict)
            append_nodes_list += [("projections_up", projection_data1),
                                     ("bands_up", bands_data1)]
            out_info_dict["spin_down"] = True
            bands_data2, projection_data2 = spin_dependent_subparcer(
                out_info_dict)
            append_nodes_list += [("projections_down", projection_data2),
                     ("bands_down", bands_data2)]
        else:
            out_info_dict["spin_down"] = False
            bands_data, projection_data = spin_dependent_subparcer(
                out_info_dict)
            append_nodes_list += [("projections", projection_data),
                     ("bands", bands_data)]

        return append_nodes_list
Exemplo n.º 25
0
def parse_raw_output_neb(out_file, input_dict, parser_opts=None):
    """
    Parses the output of a neb calculation
    Receives in input the paths to the output file.
    
    :param out_file: path to neb std output
    :param input_dict: dictionary with the neb input parameters
    :param parser_opts: not used
    
    :return parameter_data: a dictionary with parsed parameters
    :return iteration_data: a dictionary with arrays (for relax & md calcs.)
    :return structure_data: a dictionary with data for the output structure
    :return job_successful: a boolean that is False in case of failed calculations
            
    :raises QEOutputParsingError: for errors in the parsing,
    :raises AssertionError: if two keys in the parsed dicts are found to be qual

    2 different keys to check in output: parser_warnings and warnings.
    On an upper level, these flags MUST be checked.
    The first is expected to be empty unless QE failures or unfinished jobs.
    """
    import copy

    job_successful = True

    parser_version = '0.1'
    parser_info = {}
    parser_info['parser_warnings'] = []
    parser_info['parser_info'] = 'AiiDA QE Parser v{}'.format(parser_version)

    # load NEB out file
    try:
        with open(out_file, 'r') as f:
            out_lines = f.read()
    except IOError:  # non existing output file -> job crashed
        raise QEOutputParsingError(
            "Failed to open output file: {}.".format(out_file))

    if not out_lines:  # there is an output file, but it's empty -> crash
        job_successful = False

    # check if the job has finished (that doesn't mean without errors)
    finished_run = False
    for line in out_lines.split('\n')[::-1]:
        if 'JOB DONE' in line:
            finished_run = True
            break
    if not finished_run:  # error if the job has not finished
        warning = 'QE neb run did not reach the end of the execution.'
        parser_info['parser_warnings'].append(warning)
        job_successful = False

    # parse the text output of the neb calculation
    try:
        out_data, iteration_data, critical_messages = parse_neb_text_output(
            out_lines, input_dict)
    except QEOutputParsingError:
        if not finished_run:  # I try to parse it as much as possible
            parser_info['parser_warnings'].append(
                'Error while parsing the output file')
            out_data = {}
            iteration_data = {}
            critical_messages = []
        else:  # if it was finished and I got error, it's a mistake of the parser
            raise QEOutputParsingError('Error while parsing NEB output')

    # I add in the out_data all the last elements of iteration_data values.
    # I leave the possibility to skip some large arrays (None for the time being).
    skip_keys = []
    tmp_iteration_data = copy.copy(iteration_data)
    for x in tmp_iteration_data.iteritems():
        if x[0] in skip_keys:
            continue
        out_data[x[0]] = x[1][-1]

    # if there is a severe error, the calculation is FAILED
    if any([x in out_data['warnings'] for x in critical_messages]):
        job_successful = False

    parameter_data = dict(out_data.items() + parser_info.items())

    # return various data.
    # parameter data will be mapped in ParameterData
    # iteration_data in ArrayData
    return parameter_data, iteration_data, job_successful
Exemplo n.º 26
0
def parse_raw_output(out_file,
                     input_dict,
                     parser_opts=None,
                     xml_file=None,
                     dir_with_bands=None):
    """
    Parses the output of a calculation
    Receives in input the paths to the output file and the xml file.
    
    :param out_file: path to pw std output
    :param input_dict: not used
    :param parser_opts: not used
    :param dir_with_bands: path to directory with all k-points (Kxxxxx) folders
    :param xml_file: path to QE data-file.xml
    
    :returns out_dict: a dictionary with parsed data
    :return successful: a boolean that is False in case of failed calculations
            
    :raises aiida.parsers.plugins.quantumespresso.QEOutputParsingError: for errors in the parsing,
    :raises AssertionError: if two keys in the parsed dicts are found to be qual

    3 different keys to check in output: parser_warnings, xml_warnings and warnings.
    On an upper level, these flags MUST be checked.
    The first two are expected to be empty unless QE failures or unfinished jobs.
    """
    import copy
    # TODO: a lot of ifs could be cleaned out

    # TODO: input_dict should be used as well

    job_successful = True

    parser_version = '0.1'
    parser_info = {}
    parser_info['parser_warnings'] = []
    parser_info['parser_info'] = 'AiiDA QE Basic Parser v{}'.format(
        parser_version)

    # if xml_file is not given in input, skip its parsing
    if xml_file is not None:
        try:
            with open(xml_file, 'r') as f:
                xml_lines = f.read()  # Note: read() and not readlines()
        except IOError:
            raise QEOutputParsingError(
                "Failed to open xml file: {}.".format(xml_file))

        xml_data, structure_data = parse_pw_xml_output(xml_lines,
                                                       dir_with_bands)
        # Note the xml file should always be consistent.
    else:
        parser_info['parser_warnings'].append(
            'Skipping the parsing of the xml file.')
        xml_data = {}
        bands_data = {}
        structure_data = {}

    # load QE out file
    try:
        with open(out_file, 'r') as f:
            out_lines = f.read()
    except IOError:  # non existing output file -> job crashed
        raise QEOutputParsingError(
            "Failed to open output file: {}.".format(out_file))

    if not out_lines:  # there is an output file, but it's empty -> crash
        job_successful = False

    # check if the job has finished (that doesn't mean without errors)
    finished_run = False
    for line in out_lines.split('\n')[::-1]:
        if 'JOB DONE' in line:
            finished_run = True
            break
    if not finished_run:  # error if the job has not finished
        warning = 'QE pw run did not reach the end of the execution.'
        parser_info['parser_warnings'].append(warning)
        job_successful = False

    # parse
    try:
        out_data, trajectory_data, critical_messages = parse_pw_text_output(
            out_lines, xml_data, structure_data, input_dict)
    except QEOutputParsingError:
        if not finished_run:  # I try to parse it as much as possible
            parser_info['parser_warnings'].append(
                'Error while parsing the output file')
            out_data = {}
            trajectory_data = {}
            critical_messages = []
        else:  # if it was finished and I got error, it's a mistake of the parser
            raise QEOutputParsingError('Error while parsing QE output')

    # I add in the out_data all the last elements of trajectory_data values.
    # Safe for some large arrays, that I will likely never query.
    skip_keys = [
        'forces', 'lattice_vectors_relax', 'atomic_positions_relax',
        'atomic_species_name'
    ]
    tmp_trajectory_data = copy.copy(trajectory_data)
    for x in tmp_trajectory_data.iteritems():
        if x[0] in skip_keys:
            continue
        out_data[x[0]] = x[1][-1]
        if len(x[1]
               ) == 1:  # delete eventual keys that are not arrays (scf cycles)
            trajectory_data.pop(x[0])
            # note: if an array is empty, there will be KeyError
    for key in ['k_points', 'k_points_weights']:
        try:
            trajectory_data[key] = xml_data.pop(key)
        except KeyError:
            pass
    # As the k points are an array that is rather large, and again it's not something I'm going to parse likely
    # since it's an info mainly contained in the input file, I move it to the trajectory data

    # if there is a severe error, the calculation is FAILED
    if any([x in out_data['warnings'] for x in critical_messages]):
        job_successful = False

    for key in out_data.keys():
        if key in xml_data.keys():
            if key == 'fermi_energy' or key == 'fermi_energy_units':  # an exception for the (only?) key that may be found on both
                del out_data[key]
            else:
                raise AssertionError(
                    '{} found in both dictionaries, '
                    'values: {} vs. {}'.format(
                        key, out_data[key],
                        xml_data[key]))  # this shouldn't happen!
                # out_data keys take precedence and overwrite xml_data keys,
                # if the same key name is shared by both
                # dictionaries (but this should not happen!)
    parameter_data = dict(xml_data.items() + out_data.items() +
                          parser_info.items())

    # return various data.
    # parameter data will be mapped in ParameterData
    # trajectory_data in ArrayData
    # structure_data in a Structure
    # bands_data should probably be merged in ArrayData
    return parameter_data, trajectory_data, structure_data, job_successful
Exemplo n.º 27
0
def spin_dependent_subparcer(out_info_dict):
    """
    This find the projection and bands arrays from the out_file and
    out_info_dict. Used to handle the different possible spin-cases in
    a convenient manner.

    :param out_info_dict: contains various technical internals useful in parsing
    :return: ProjectionData, BandsData parsed from out_file
    """

    out_file = out_info_dict["out_file"]
    spin_down = out_info_dict["spin_down"]
    od = out_info_dict #using a shorter name for convenience
    #   regular expressions needed for later parsing
    WaveFraction1_re = re.compile(r"\=(.*?)\*")  # state composition 1
    WaveFractionremain_re = re.compile(r"\+(.*?)\*")  # state comp 2
    FunctionId_re = re.compile(r"\#(.*?)\]")  # state identity
    # primes arrays for the later parsing
    num_wfc = len(od["wfc_lines"])
    bands = np.zeros([od["k_states"], od["num_bands"]])
    projection_arrays = np.zeros([od["k_states"], od["num_bands"], num_wfc])

    try:
        for i in range(od["k_states"]):
            if spin_down:
                i += od["k_states"]
            # grabs band energy
            for j in range (i*od["num_bands"],(i+1)*od["num_bands"],1):
                out_ind = od["e_lines"][j]
                val = out_file[out_ind].split()[4]
                bands[i%od["k_states"]][j%od["num_bands"]] = val
                #subloop grabs pdos
                wave_fraction = []
                wave_id = []
                for k in range(od["e_lines"][j]+1,od["psi_lines"][j],1):
                    out_line = out_file[k]
                    wave_fraction += WaveFraction1_re.findall(out_line)
                    wave_fraction += WaveFractionremain_re.findall(out_line)
                    wave_id += FunctionId_re.findall(out_line)
                if len(wave_id) != len(wave_fraction):
                    raise IndexError
                for l in range (len(wave_id)):
                    wave_id[l] = int(wave_id[l])
                    wave_fraction[l] = float(wave_fraction[l])
                    #sets relevant values in pdos_array
                    projection_arrays[i%od["k_states"]][
                        j%od["num_bands"]][wave_id[l]-1] = wave_fraction[l]
    except IndexError:
        raise QEOutputParsingError("the standard out file does not "
                                   "comply with the official "
                                   "documentation.")

    bands_data = BandsData()
    try:
    # Attempts to retrive the kpoints from the parent calc
        parent_calc = out_info_dict["parent_calc"]
        parent_kpoints = parent_calc.get_inputs_dict()['kpoints']
        if len(od['k_vect']) != len(parent_kpoints.get_kpoints()):
            raise AttributeError
        bands_data.set_kpointsdata(parent_kpoints)
    except AttributeError:
        bands_data.set_kpoints(od['k_vect'].astype(float))

    bands_data.set_bands(bands, units='eV')

    orbitals = out_info_dict["orbitals"]
    if len(orbitals) != np.shape(projection_arrays[0,0,:])[0]:
        raise QEOutputParsingError("There was an internal parsing error, "
                                   " the projection array shape does not agree"
                                   " with the number of orbitals")
    projection_data = ProjectionData()
    projection_data.set_reference_bandsdata(bands_data)
    projections = [projection_arrays[:,:,i] for i in range(len(orbitals))]

    # Do the bands_check manually here
    for projection in projections:
        if np.shape(projection) !=  np.shape(bands):
            raise AttributeError("Projections not the same shape as the bands")


    #insert here some logic to assign pdos to the orbitals
    pdos_arrays = spin_dependent_pdos_subparcer(out_info_dict)
    energy_arrays = [out_info_dict["energy"]]*len(orbitals)
    projection_data.set_projectiondata(orbitals,
                                       list_of_projections=projections,
                                       list_of_energy=energy_arrays,
                                       list_of_pdos=pdos_arrays,
                                       bands_check=False)
    # pdos=pdos_arrays
    return bands_data,  projection_data
Exemplo n.º 28
0
def parse_cp_xml_output(data):
    """
    Parse xml data
    data must be a single string, as returned by file.read() (notice the
    difference with parse_text_output!)
    On output, a dictionary with parsed values.
    Democratically, we have decided to use picoseconds as units of time, eV for energies, Angstrom for lengths.
    """
    import copy

    dom = parseString(data)

    parsed_data = {}

    #CARD HEADER
    parsed_data = copy.deepcopy(xml_card_header(parsed_data, dom))

    # CARD CONTROL

    cardname = 'CONTROL'
    target_tags = read_xml_card(dom, cardname)

    tagname = 'PP_CHECK_FLAG'
    parsed_data[tagname.lower()] = parse_xml_child_bool(tagname, target_tags)

    # CARD STATUS

    cardname = 'STATUS'
    target_tags = read_xml_card(dom, cardname)

    tagname = 'STEP'
    attrname = 'ITERATION'
    parsed_data[(tagname + '_' + attrname).lower()] = int(
        parse_xml_child_attribute_str(tagname, attrname, target_tags))

    tagname = 'TIME'
    attrname = 'UNITS'
    value = parse_xml_child_float(tagname, target_tags)
    units = parse_xml_child_attribute_str(tagname, attrname, target_tags)
    if units not in ['pico-seconds']:
        raise QEOutputParsingError(
            "Units {} are not supported by parser".format(units))
    parsed_data[tagname.lower()] = value

    tagname = 'TITLE'
    parsed_data[tagname.lower()] = parse_xml_child_str(tagname, target_tags)

    # CARD CELL
    parsed_data, lattice_vectors, volume = copy.deepcopy(
        xml_card_cell(parsed_data, dom))

    # CARD IONS
    parsed_data = copy.deepcopy(
        xml_card_ions(parsed_data, dom, lattice_vectors, volume))

    # CARD PLANE WAVES

    parsed_data = copy.deepcopy(xml_card_planewaves(parsed_data, dom, 'cp'))

    # CARD SPIN
    parsed_data = copy.deepcopy(xml_card_spin(parsed_data, dom))

    # CARD EXCHANGE_CORRELATION
    parsed_data = copy.deepcopy(xml_card_exchangecorrelation(parsed_data, dom))

    # TODO CARD OCCUPATIONS

    # CARD BRILLOUIN ZONE
    # TODO: k points are saved for CP... Why?

    cardname = 'BRILLOUIN_ZONE'
    target_tags = read_xml_card(dom, cardname)

    tagname = 'NUMBER_OF_K-POINTS'
    parsed_data[tagname.replace('-', '_').lower()] = parse_xml_child_integer(
        tagname, target_tags)

    tagname = 'UNITS_FOR_K-POINTS'
    attrname = 'UNITS'
    metric = parse_xml_child_attribute_str(tagname, attrname, target_tags)
    if metric not in ['2 pi / a']:
        raise QEOutputParsingError(
            'Error parsing attribute %s, tag %s inside %s, units unknown' %
            (attrname, tagname, target_tags.tagName))
    parsed_data[tagname.replace('-', '_').lower()] = metric

    # TODO: check what happens if one does not use the monkhorst pack in the code
    tagname = 'MONKHORST_PACK_GRID'
    try:
        a = target_tags.getElementsByTagName(tagname)[0]
        value = [int(a.getAttribute('nk' + str(i + 1))) for i in range(3)]
        parsed_data[tagname.replace('-', '_').lower()] = value
    except:
        raise QEOutputParsingError('Error parsing tag %s inside %s.' %
                                   (tagname, target_tags.tagName))

    tagname = 'MONKHORST_PACK_OFFSET'
    try:
        a = target_tags.getElementsByTagName(tagname)[0]
        value = [int(a.getAttribute('k' + str(i + 1))) for i in range(3)]
        parsed_data[tagname.replace('-', '_').lower()] = value
    except:
        raise QEOutputParsingError('Error parsing tag %s inside %s.' %
                                   (tagname, target_tags.tagName))

    try:
        kpoints = []
        for i in range(parsed_data['number_of_k_points']):
            tagname = 'K-POINT.' + str(i + 1)
            a = target_tags.getElementsByTagName(tagname)[0]
            b = a.getAttribute('XYZ').replace('\n', '').rsplit()
            value = [float(s) for s in b]

            metric = parsed_data['units_for_k_points']
            if metric == '2 pi / a':
                value = [
                    float(s) / parsed_data['lattice_parameter'] for s in value
                ]

                weight = float(a.getAttribute('WEIGHT'))

                kpoints.append([value, weight])

        parsed_data['k_point'] = kpoints
    except:
        raise QEOutputParsingError('Error parsing tag K-POINT.# inside %s.' %
                                   (target_tags.tagName))

    tagname = 'NORM-OF-Q'
    # TODO decide if save this parameter
    parsed_data[tagname.replace('-', '_').lower()] = parse_xml_child_float(
        tagname, target_tags)

    # CARD PARALLELISM
    # can be optional

    try:
        cardname = 'PARALLELISM'
        target_tags = read_xml_card(dom, cardname)

        tagname = 'GRANULARITY_OF_K-POINTS_DISTRIBUTION'
        parsed_data[tagname.lower().replace('-',
                                            '_')] = parse_xml_child_integer(
                                                tagname, target_tags)

        tagname = 'NUMBER_OF_PROCESSORS'
        parsed_data[tagname.lower().replace('-',
                                            '_')] = parse_xml_child_integer(
                                                tagname, target_tags)

        tagname = 'NUMBER_OF_PROCESSORS_PER_POOL'
        parsed_data[tagname.lower().replace('-',
                                            '_')] = parse_xml_child_integer(
                                                tagname, target_tags)

        tagname = 'NUMBER_OF_PROCESSORS_PER_IMAGE'
        parsed_data[tagname.lower().replace('-',
                                            '_')] = parse_xml_child_integer(
                                                tagname, target_tags)

        tagname = 'NUMBER_OF_PROCESSORS_PER_TASKGROUP'
        parsed_data[tagname.lower().replace('-',
                                            '_')] = parse_xml_child_integer(
                                                tagname, target_tags)

        tagname = 'NUMBER_OF_PROCESSORS_PER_POT'
        parsed_data[tagname.lower().replace('-',
                                            '_')] = parse_xml_child_integer(
                                                tagname, target_tags)

        tagname = 'NUMBER_OF_PROCESSORS_PER_BAND_GROUP'
        parsed_data[tagname.lower().replace('-',
                                            '_')] = parse_xml_child_integer(
                                                tagname, target_tags)

        tagname = 'NUMBER_OF_PROCESSORS_PER_DIAGONALIZATION'
        parsed_data[tagname.lower().replace('-',
                                            '_')] = parse_xml_child_integer(
                                                tagname, target_tags)
    except:
        pass

    # CARD TIMESTEPS

    cardname = 'TIMESTEPS'
    target_tags = read_xml_card(dom, cardname)

    for tagname in ['STEP0', 'STEPM']:
        try:
            tag = target_tags.getElementsByTagName(tagname)[0]

            try:
                second_tagname = 'ACCUMULATORS'
                second_tag = tag.getElementsByTagName(second_tagname)[0]
                data = second_tag.childNodes[0].data.rstrip().split(
                )  # list of floats
                parsed_data[second_tagname.replace(
                    '-', '_').lower()] = [float(i) for i in data]
            except:
                pass

            second_tagname = 'IONS_POSITIONS'
            second_tag = tag.getElementsByTagName(second_tagname)[0]
            third_tagname = 'stau'
            third_tag = second_tag.getElementsByTagName(third_tagname)[0]
            list_data = third_tag.childNodes[0].data.rstrip().split()
            list_data = [float(i) for i in list_data]
            # convert to matrix
            val = []
            mat = []
            for i, data in enumerate(list_data):
                val.append(data)
                if (i + 1) % 3 == 0:
                    mat.append(val)
                    val = []
            parsed_data[(second_tagname + '_' + third_tagname).replace(
                '-', '_').lower()] = mat
            third_tagname = 'svel'
            third_tag = second_tag.getElementsByTagName(third_tagname)[0]
            list_data = third_tag.childNodes[0].data.rstrip().split()
            list_data = [float(i) for i in list_data]
            # convert to matrix
            val = []
            mat = []
            for i, data in enumerate(list_data):
                val.append(data)
                if (i + 1) % 3 == 0:
                    mat.append(val)
                    val = []
            parsed_data[(second_tagname + '_' + third_tagname).replace(
                '-', '_').lower()] = mat
            try:
                third_tagname = 'taui'
                third_tag = second_tag.getElementsByTagName(third_tagname)[0]
                list_data = third_tag.childNodes[0].data.rstrip().split()
                list_data = [float(i) for i in list_data]
                # convert to matrix
                val = []
                mat = []
                for i, data in enumerate(list_data):
                    val.append(data)
                    if (i + 1) % 3 == 0:
                        mat.append(val)
                        val = []
                parsed_data[(second_tagname + '_' + third_tagname).replace(
                    '-', '_').lower()] = mat
            except:
                pass

            try:
                third_tagname = 'cdmi'
                third_tag = second_tag.getElementsByTagName(third_tagname)[0]
                list_data = third_tag.childNodes[0].data.rstrip().split()
                parsed_data[(second_tagname + '_' + third_tagname).replace(
                    '-', '_').lower()] = [float(i) for i in list_data]
            except:
                pass

            try:
                third_tagname = 'force'
                third_tag = second_tag.getElementsByTagName(third_tagname)[0]
                list_data = third_tag.childNodes[0].data.rstrip().split()
                list_data = [float(i) for i in list_data]
                # convert to matrix
                val = []
                mat = []
                for i, data in enumerate(list_data):
                    val.append(data)
                    if (i + 1) % 3 == 0:
                        mat.append(val)
                        val = []
                parsed_data[(second_tagname + '_' + third_tagname).replace(
                    '-', '_').lower()] = mat
            except:
                pass

            second_tagname = 'IONS_NOSE'
            second_tag = tag.getElementsByTagName(second_tagname)[0]
            third_tagname = 'nhpcl'
            third_tag = second_tag.getElementsByTagName(third_tagname)[0]
            parsed_data[(second_tagname + '_' + third_tagname).replace(
                '-', '_').lower()] = float(third_tag.childNodes[0].data)
            third_tagname = 'nhpdim'
            third_tag = second_tag.getElementsByTagName(third_tagname)[0]
            parsed_data[(second_tagname + '_' + third_tagname).replace(
                '-', '_').lower()] = float(third_tag.childNodes[0].data)
            third_tagname = 'xnhp'
            third_tag = second_tag.getElementsByTagName(third_tagname)[0]
            parsed_data[(second_tagname + '_' + third_tagname).replace(
                '-', '_').lower()] = float(third_tag.childNodes[0].data)
            try:
                third_tagname = 'vnhp'
                third_tag = second_tag.getElementsByTagName(third_tagname)[0]
                parsed_data[(second_tagname + '_' + third_tagname).replace(
                    '-', '_').lower()] = float(third_tag.childNodes[0].data)
            except:
                pass

            try:
                second_tagname = 'ekincm'
                second_tag = tag.getElementsByTagName(second_tagname)[0]
                parsed_data[second_tagname.replace('-', '_').lower()] = float(
                    second_tag.childNodes[0].data)
            except:
                pass

            second_tagname = 'ELECTRONS_NOSE'
            second_tag = tag.getElementsByTagName(second_tagname)[0]
            try:
                third_tagname = 'xnhe'
                third_tag = second_tag.getElementsByTagName(third_tagname)[0]
                parsed_data[(second_tagname + '_' + third_tagname).replace(
                    '-', '_').lower()] = float(third_tag.childNodes[0].data)
            except:
                pass
            try:
                third_tagname = 'vnhe'
                third_tag = second_tag.getElementsByTagName(third_tagname)[0]
                parsed_data[(second_tagname + '_' + third_tagname).replace(
                    '-', '_').lower()] = float(third_tag.childNodes[0].data)
            except:
                pass

            second_tagname = 'CELL_PARAMETERS'
            second_tag = tag.getElementsByTagName(second_tagname)[0]
            try:
                third_tagname = 'ht'
                third_tag = second_tag.getElementsByTagName(third_tagname)[0]
                list_data = third_tag.childNodes[0].data.rstrip().split()
                list_data = [float(i) for i in list_data]
                # convert to matrix
                val = []
                mat = []
                for i, data in enumerate(list_data):
                    val.append(data)
                    if (i + 1) % 3 == 0:
                        mat.append(val)
                        val = []
                parsed_data[(second_tagname + '_' + third_tagname).replace(
                    '-', '_').lower()] = mat
            except:
                pass
            try:
                third_tagname = 'htvel'
                third_tag = second_tag.getElementsByTagName(third_tagname)[0]
                list_data = third_tag.childNodes[0].data.rstrip().split()
                list_data = [float(i) for i in list_data]
                # convert to matrix
                val = []
                mat = []
                for i, data in enumerate(list_data):
                    val.append(data)
                    if (i + 1) % 3 == 0:
                        mat.append(val)
                        val = []
                parsed_data[(second_tagname + '_' + third_tagname).replace(
                    '-', '_').lower()] = mat
            except:
                pass
            try:
                third_tagname = 'gvel'
                third_tag = second_tag.getElementsByTagName(third_tagname)[0]
                list_data = third_tag.childNodes[0].data.rstrip().split()
                list_data = [float(i) for i in list_data]
                # convert to matrix
                val = []
                mat = []
                for i, data in enumerate(list_data):
                    val.append(data)
                    if (i + 1) % 3 == 0:
                        mat.append(val)
                        val = []
                parsed_data[(second_tagname + '_' + third_tagname).replace(
                    '-', '_').lower()] = mat
            except:
                pass

            second_tagname = 'CELL_NOSE'
            second_tag = tag.getElementsByTagName(second_tagname)[0]
            try:
                third_tagname = 'xnhh'
                third_tag = second_tag.getElementsByTagName(third_tagname)[0]
                list_data = third_tag.childNodes[0].data.rstrip().split()
                list_data = [float(i) for i in list_data]

                # convert to matrix
                val = []
                mat = []
                for i, data in enumerate(list_data):
                    val.append(data)
                    if (i + 1) % 3 == 0:
                        mat.append(val)
                        val = []
                parsed_data[(second_tagname + '_' + third_tagname).replace(
                    '-', '_').lower()] = mat
            except:
                pass
            try:
                third_tagname = 'vnhh'
                third_tag = second_tag.getElementsByTagName(third_tagname)[0]
                list_data = third_tag.childNodes[0].data.rstrip().split()
                list_data = [float(i) for i in list_data]
                # convert to matrix
                val = []
                mat = []
                for i, data in enumerate(list_data):
                    val.append(data)
                    if (i + 1) % 3 == 0:
                        mat.append(val)
                        val = []
                parsed_data[(second_tagname + '_' + third_tagname).replace(
                    '-', '_').lower()] = mat
            except:
                pass
        except:
            raise QEOutputParsingError(
                'Error parsing CARD {}'.format(cardname))

    # CARD BAND_STRUCTURE_INFO

    cardname = 'BAND_STRUCTURE_INFO'
    target_tags = read_xml_card(dom, cardname)

    tagname = 'NUMBER_OF_ATOMIC_WFC'
    parsed_data[tagname.lower().replace('-', '_')] = parse_xml_child_integer(
        tagname, target_tags)

    tagname = 'NUMBER_OF_ELECTRONS'
    parsed_data[tagname.lower().replace('-', '_')] = int(
        parse_xml_child_float(tagname, target_tags))

    tagname = 'NUMBER_OF_BANDS'
    parsed_data[tagname.lower().replace('-', '_')] = parse_xml_child_integer(
        tagname, target_tags)

    tagname = 'NUMBER_OF_SPIN_COMPONENTS'
    parsed_data[tagname.lower().replace('-', '_')] = parse_xml_child_integer(
        tagname, target_tags)

    # TODO
    # - EIGENVALUES (that actually just contains occupations)
    #   Why should I be interested in that, if CP works for insulators only?
    # - EIGENVECTORS
    # - others TODO are written in the function

    return parsed_data