def parse_bands_file(bands_lines): ''' Parses the returned bands.1 and bands.2 file and returns a complete bandsData object. bands.1 has the form: k value, energy :param bands_lines: string of the read in bands file ''' # TODO not finished # read bands out of file: nrows = 0 # get number of rows (known form number of atom types bands_values = [] #init an array of arrays nkpoint * ... bands_labels = [] # label for each row. # fill and correct fermi energy. bands_values = [] # TODO we need to get the cell from StructureData node # and KpointsData node from inpxml fleur_bands = BandsData() #fleur_bands.set_cell(cell) #fleur_bands.set_kpoints(kpoints, cartesian=True) fleur_bands.set_bands(bands=bands_values, units='eV', labels=bands_labels) for line in bands_lines: pass return fleur_bands
def sub_create_bands_data(cls, user=None): from aiida.orm.data.array.kpoints import KpointsData from aiida.orm import JobCalculation from aiida.orm.data.structure import StructureData from aiida.common.links import LinkType from aiida.orm.data.array.bands import BandsData import numpy s = StructureData(cell=((2., 0., 0.), (0., 2., 0.), (0., 0., 2.))) s.append_atom(position=(0., 0., 0.), symbols=['Ba', 'Ti'], weights=(1., 0.), name='mytype') if user is not None: s.dbnode.user = user._dbuser s.store() c = JobCalculation(computer=cls.computer, resources={ 'num_machines': 1, 'num_mpiprocs_per_machine': 1 }) if user is not None: c.dbnode.user = user._dbuser c.store() c.add_link_from(s, "S1", LinkType.INPUT) c._set_state(calc_states.RETRIEVING) # define a cell alat = 4. cell = numpy.array([ [alat, 0., 0.], [0., alat, 0.], [0., 0., alat], ]) k = KpointsData() k.set_cell(cell) k.set_kpoints_path() if user is not None: k.dbnode.user = user._dbuser k.store() b = BandsData() b.set_kpointsdata(k) input_bands = numpy.array( [numpy.ones(4) * i for i in range(k.get_kpoints().shape[0])]) b.set_bands(input_bands, units='eV') if user is not None: b.dbnode.user = user._dbuser b.store() b.add_link_from(c, link_type=LinkType.CREATE) return b
def _get_output_nodes(self, output_path, bands_path): """ Extracts output nodes from the standard output and standard error files. (And XML and JSON files) """ from aiida.orm.data.array.trajectory import TrajectoryData import re result_list = [] # Add errors successful = True if output_path is None: errors_list = ['WARNING: No aiida.out file...'] else: successful, errors_list = self.get_errors_from_file(output_path) result_dict = {} result_dict["errors"] = errors_list # Add warnings warnings_list = self.get_warnings_from_file(output_path) result_dict["warnings"] = warnings_list # Add outuput data output_dict = self.get_output_from_file(output_path) result_dict.update(output_dict) # Add parser info dictionary parser_info = {} parser_version = 'aiida-0.11.0--plugin-0.11.5' parser_info['parser_info'] =\ 'AiiDA Vibra Parser V. {}'.format(parser_version) parser_info['parser_warnings'] = [] parsed_dict = dict(result_dict.items() + parser_info.items()) output_data = ParameterData(dict=parsed_dict) link_name = self.get_linkname_outparams() result_list.append((link_name,output_data)) # Parse band-structure information if available if bands_path is not None: bands, coords = self.get_bands(bands_path) from aiida.orm.data.array.bands import BandsData arraybands = BandsData() arraybands.set_kpoints(self._calc.inp.bandskpoints.get_kpoints(cartesian=True)) arraybands.set_bands(bands,units="eV") result_list.append((self.get_linkname_bandsarray(), arraybands)) bandsparameters = ParameterData(dict={"kp_coordinates": coords}) result_list.append((self.get_linkname_bandsparameters(), bandsparameters)) return successful, result_list
def connect_structure_bands(structure): alat = 4. cell = np.array([ [alat, 0., 0.], [0., alat, 0.], [0., 0., alat], ]) k = KpointsData() k.set_cell(cell) k.set_kpoints_path([('G', 'M', 2)]) b = BandsData() b.set_kpointsdata(k) b.set_bands([[1.0, 2.0], [3.0, 4.0]]) k.store() b.store() return b
def parse_with_retrieved(self, retrieved): """ Parse the output nodes for a PwCalculations from a dictionary of retrieved nodes. Two nodes that are expected are the default 'retrieved' FolderData node which will store the retrieved files permanently in the repository. The second required node is the unstored FolderData node with the temporary retrieved files, which should be passed under the key 'retrieved_temporary_folder_key' of the Parser class. :param retrieved: a dictionary of retrieved nodes """ import os import numpy successful = True # Load the input dictionary parameters = self._calc.inp.parameters.get_dict() # Look for optional settings input node and potential 'parser_options' dictionary within it try: settings = self._calc.inp.settings.get_dict() parser_opts = settings[self.get_parser_settings_key()] except (AttributeError, KeyError): settings = {} parser_opts = {} # Check that the retrieved folder is there try: out_folder = retrieved[self._calc._get_linkname_retrieved()] except KeyError: self.logger.error("No retrieved folder found") return False, () # Verify that the retrieved_temporary_folder is within the arguments if temporary files were specified if self._calc._get_retrieve_temporary_list(): try: temporary_folder = retrieved[self.retrieved_temporary_folder_key] dir_with_bands = temporary_folder.get_abs_path('.') except KeyError: self.logger.error('the {} was not passed as an argument'.format(self.retrieved_temporary_folder_key)) return False, () else: dir_with_bands = None list_of_files = out_folder.get_folder_list() # The stdout is required for parsing if not self._calc._OUTPUT_FILE_NAME in list_of_files: self.logger.error("The standard output file '{}' was not found but is required".format(self._calc._OUTPUT_FILE_NAME)) return False, () # The xml file is required for parsing if not self._calc._DATAFILE_XML_BASENAME in list_of_files: self.logger.error("The xml output file '{}' was not found but is required".format(self._calc._DATAFILE_XML_BASENAME)) successful = False xml_file = None else: xml_file = os.path.join(out_folder.get_abs_path('.'), self._calc._DATAFILE_XML_BASENAME) out_file = os.path.join(out_folder.get_abs_path('.'), self._calc._OUTPUT_FILE_NAME) # Call the raw parsing function parsing_args = [out_file, parameters, parser_opts, xml_file, dir_with_bands] out_dict, trajectory_data, structure_data, bands_data, raw_successful = parse_raw_output(*parsing_args) # If calculation was not considered failed already, use the new value successful = raw_successful if successful else successful # The symmetry info has large arrays, that occupy most of the database. # turns out most of this is due to 64 matrices that are repeated over and over again. # therefore I map part of the results in a list of dictionaries wrote here once and for all # if the parser_opts has a key all_symmetries set to True, I don't reduce it all_symmetries = parser_opts.get('all_symmetries', False) if not all_symmetries: try: if 'symmetries' in out_dict.keys(): old_symmetries = out_dict['symmetries'] new_symmetries = [] for this_sym in old_symmetries: name = this_sym['name'] index = None for i,this in enumerate(self._possible_symmetries): if name in this['name']: index = i if index is None: self.logger.error("Symmetry {} not found".format(name)) new_dict = {} # note: here I lose the information about equivalent # ions and fractional_translation. # They will be present with all_symmetries=True new_dict['t_rev'] = this_sym['t_rev'] new_dict['symmetry_number'] = index new_symmetries.append(new_dict) out_dict['symmetries'] = new_symmetries # and overwrite the old one except KeyError: # no symmetries were parsed (failed case, likely) self.logger.error("No symmetries were found in output") new_nodes_list = [] # I eventually save the new structure. structure_data is unnecessary after this in_struc = self._calc.get_inputs_dict()['structure'] type_calc = parameters['CONTROL']['calculation'] struc = in_struc if type_calc in ['relax', 'vc-relax', 'md', 'vc-md']: if 'cell' in structure_data.keys(): struc = convert_qe2aiida_structure(structure_data, input_structure=in_struc) new_nodes_list.append((self.get_linkname_outstructure(), struc)) k_points_list = trajectory_data.pop('k_points', None) k_points_weights_list = trajectory_data.pop('k_points_weights', None) if k_points_list is not None: # Build the kpoints object if out_dict['k_points_units'] not in ['2 pi / Angstrom']: raise QEOutputParsingError('Error in kpoints units (should be cartesian)') kpoints_from_output = KpointsData() kpoints_from_output.set_cell_from_structure(struc) kpoints_from_output.set_kpoints(k_points_list, cartesian=True, weights=k_points_weights_list) kpoints_from_input = self._calc.inp.kpoints if not bands_data: try: kpoints_from_input.get_kpoints() except AttributeError: new_nodes_list += [(self.get_linkname_out_kpoints(), kpoints_from_output)] # Converting bands into a BandsData object (including the kpoints) if bands_data: kpoints_for_bands = kpoints_from_output try: kpoints_from_input.get_kpoints() kpoints_for_bands.labels = kpoints_from_input.labels except (AttributeError, ValueError, TypeError): # AttributeError: no list of kpoints in input # ValueError: labels from input do not match the output # list of kpoints (some kpoints are missing) # TypeError: labels are not set, so kpoints_from_input.labels=None pass # Get the bands occupations and correct the occupations of QE: # If it computes only one component, it occupies it with half number of electrons try: bands_data['occupations'][1] the_occupations = bands_data['occupations'] except IndexError: the_occupations = 2.*numpy.array(bands_data['occupations'][0]) try: bands_data['bands'][1] bands_energies = bands_data['bands'] except IndexError: bands_energies = bands_data['bands'][0] the_bands_data = BandsData() the_bands_data.set_kpointsdata(kpoints_for_bands) the_bands_data.set_bands(bands_energies, units = bands_data['bands_units'], occupations = the_occupations) new_nodes_list += [('output_band', the_bands_data)] out_dict['linknames_band'] = ['output_band'] # Separate the atomic_occupations dictionary in its own node if it is present atomic_occupations = out_dict.get('atomic_occupations', {}) if atomic_occupations: out_dict.pop('atomic_occupations') atomic_occupations_node = ParameterData(dict=atomic_occupations) new_nodes_list.append(('output_atomic_occupations', atomic_occupations_node)) output_params = ParameterData(dict=out_dict) new_nodes_list.append((self.get_linkname_outparams(), output_params)) if trajectory_data: from aiida.orm.data.array.trajectory import TrajectoryData from aiida.orm.data.array import ArrayData try: positions = numpy.array( trajectory_data.pop('atomic_positions_relax')) try: cells = numpy.array( trajectory_data.pop('lattice_vectors_relax')) # if KeyError, the MD was at fixed cell except KeyError: cells = numpy.array([in_struc.cell] * len(positions)) symbols = numpy.array([str(i.kind_name) for i in in_struc.sites]) stepids = numpy.arange(len(positions)) # a growing integer per step # I will insert time parsing when they fix their issues about time # printing (logic is broken if restart is on) traj = TrajectoryData() traj.set_trajectory( stepids = stepids, cells = cells, symbols = symbols, positions = positions, ) for x in trajectory_data.iteritems(): traj.set_array(x[0],numpy.array(x[1])) new_nodes_list.append((self.get_linkname_outtrajectory(),traj)) except KeyError: # forces, atomic charges and atomic mag. moments, in scf calculation (when outputed) arraydata = ArrayData() for x in trajectory_data.iteritems(): arraydata.set_array(x[0],numpy.array(x[1])) new_nodes_list.append((self.get_linkname_outarray(),arraydata)) return successful, new_nodes_list
def _get_output_nodes(self, output_path, messages_path, xml_path, json_path, bands_path): """ Extracts output nodes from the standard output and standard error files. (And XML and JSON files) """ from aiida.orm.data.array.trajectory import TrajectoryData import re parser_version = 'aiida-0.11.0--plugin-0.11.5' parser_info = {} parser_info['parser_info'] = 'AiiDA Siesta Parser V. {}'.format( parser_version) parser_info['parser_warnings'] = [] result_list = [] if xml_path is None: self.logger.error("Could not find a CML file to parse") # NOTE aiida.xml is not there? raise SiestaOutputParsingError( "Could not find a CML file to parse") # We get everything from the CML file xmldoc = get_parsed_xml_doc(xml_path) if xmldoc is None: self.logger.error("Malformed CML file: cannot parse") raise SiestaCMLParsingError("Malformed CML file: cannot parse") # These are examples of how we can access input items # # Structure (mandatory) # in_struc = self._calc.get_inputs_dict()['structure'] # # Settings (optional) # try: in_settings = self._calc.get_inputs_dict()['settings'] except KeyError: in_settings = None result_dict = get_dict_from_xml_doc(xmldoc) # Add timing information if json_path is None: self.logger.info("Could not find a time.json file to parse") else: from json_time import get_timing_info global_time, timing_decomp = get_timing_info(json_path) if global_time is None: self.logger.info("Cannot fully parse the time.json file") else: result_dict["global_time"] = global_time result_dict["timing_decomposition"] = timing_decomp # Add warnings successful = True if messages_path is None: # Perhaps using an old version of Siesta warnings_list = ['WARNING: No MESSAGES file...'] else: successful, warnings_list = self.get_warnings_from_file( messages_path) result_dict["warnings"] = warnings_list # Add parser info dictionary parsed_dict = dict(result_dict.items() + parser_info.items()) output_data = ParameterData(dict=parsed_dict) link_name = self.get_linkname_outparams() result_list.append((link_name, output_data)) # If the structure has changed, save it if is_variable_geometry(xmldoc): # Get the input structure to copy its site names, # as the CML file traditionally contained only the # atomic symbols. # struc = get_last_structure(xmldoc, in_struc) result_list.append((self.get_linkname_outstructure(), struc)) # Save forces and stress in an ArrayData object forces, stress = get_final_forces_and_stress(xmldoc) if forces is not None and stress is not None: from aiida.orm.data.array import ArrayData arraydata = ArrayData() arraydata.set_array('forces', np.array(forces)) arraydata.set_array('stress', np.array(stress)) result_list.append((self.get_linkname_outarray(), arraydata)) # Parse band-structure information if available if bands_path is not None: bands, coords = self.get_bands(bands_path) from aiida.orm.data.array.bands import BandsData arraybands = BandsData() arraybands.set_kpoints( self._calc.inp.bandskpoints.get_kpoints(cartesian=True)) arraybands.set_bands(bands, units="eV") result_list.append((self.get_linkname_bandsarray(), arraybands)) bandsparameters = ParameterData(dict={"kp_coordinates": coords}) result_list.append( (self.get_linkname_bandsparameters(), bandsparameters)) return successful, result_list
def parse_with_retrieved(self, retrieved): """ Receives in input a dictionary of retrieved nodes. Does all the logic here. """ from aiida.common.exceptions import InvalidOperation import os import glob successful = True # check if I'm not to overwrite anything #state = self._calc.get_state() #if state != calc_states.PARSING: # raise InvalidOperation("Calculation not in {} state" # .format(calc_states.PARSING) ) # look for eventual flags of the parser try: parser_opts = self._calc.inp.settings.get_dict()[ self.get_parser_settings_key()] except (AttributeError, KeyError): parser_opts = {} # load the input dictionary # TODO: pass this input_dict to the parser. It might need it. input_dict = self._calc.inp.parameters.get_dict() # Check that the retrieved folder is there try: out_folder = retrieved[self._calc._get_linkname_retrieved()] except KeyError: self.logger.error("No retrieved folder found") return False, () # check what is inside the folder list_of_files = out_folder.get_folder_list() # at least the stdout should exist if not self._calc._OUTPUT_FILE_NAME in list_of_files: self.logger.error("Standard output not found") successful = False return successful, () # if there is something more, I note it down, so to call the raw parser # with the right options # look for xml has_xml = False if self._calc._DATAFILE_XML_BASENAME in list_of_files: has_xml = True # look for bands has_bands = False if glob.glob(os.path.join(out_folder.get_abs_path('.'), 'K*[0-9]')): # Note: assuming format of kpoints subfolder is K*[0-9] has_bands = True # TODO: maybe it can be more general than bands only? out_file = os.path.join(out_folder.get_abs_path('.'), self._calc._OUTPUT_FILE_NAME) xml_file = os.path.join(out_folder.get_abs_path('.'), self._calc._DATAFILE_XML_BASENAME) dir_with_bands = out_folder.get_abs_path('.') # call the raw parsing function parsing_args = [out_file, input_dict, parser_opts] if has_xml: parsing_args.append(xml_file) if has_bands: if not has_xml: self.logger.warning("Cannot parse bands if xml file not " "found") else: parsing_args.append(dir_with_bands) out_dict, trajectory_data, structure_data, bands_data, raw_successful = parse_raw_output( *parsing_args) # if calculation was not considered failed already, use the new value successful = raw_successful if successful else successful # The symmetry info has large arrays, that occupy most of the database. # turns out most of this is due to 64 matrices that are repeated over and over again. # therefore I map part of the results in a list of dictionaries wrote here once and for all # if the parser_opts has a key all_symmetries set to True, I don't reduce it all_symmetries = parser_opts.get('all_symmetries', False) if not all_symmetries: try: if 'symmetries' in out_dict.keys(): old_symmetries = out_dict['symmetries'] new_symmetries = [] for this_sym in old_symmetries: name = this_sym['name'] index = None for i, this in enumerate(self._possible_symmetries): if name in this['name']: index = i if index is None: self.logger.error( "Symmetry {} not found".format(name)) new_dict = {} # note: here I lose the information about equivalent # ions and fractional_translation. # They will be present with all_symmetries=True new_dict['t_rev'] = this_sym['t_rev'] new_dict['symmetry_number'] = index new_symmetries.append(new_dict) out_dict[ 'symmetries'] = new_symmetries # and overwrite the old one except KeyError: # no symmetries were parsed (failed case, likely) self.logger.error("No symmetries were found in output") new_nodes_list = [] # I eventually save the new structure. structure_data is unnecessary after this in_struc = self._calc.get_inputs_dict()['structure'] type_calc = input_dict['CONTROL']['calculation'] struc = in_struc if type_calc in ['relax', 'vc-relax', 'md', 'vc-md']: if 'cell' in structure_data.keys(): struc = convert_qe2aiida_structure(structure_data, input_structure=in_struc) new_nodes_list.append( (self.get_linkname_outstructure(), struc)) k_points_list = trajectory_data.pop('k_points', None) k_points_weights_list = trajectory_data.pop('k_points_weights', None) if k_points_list is not None: # build the kpoints object if out_dict['k_points_units'] not in ['2 pi / Angstrom']: raise QEOutputParsingError( 'Error in kpoints units (should be cartesian)') # converting bands into a BandsData object (including the kpoints) kpoints_from_output = KpointsData() kpoints_from_output.set_cell_from_structure(struc) kpoints_from_output.set_kpoints(k_points_list, cartesian=True, weights=k_points_weights_list) kpoints_from_input = self._calc.inp.kpoints if not bands_data: try: kpoints_from_input.get_kpoints() except AttributeError: new_nodes_list += [(self.get_linkname_out_kpoints(), kpoints_from_output)] if bands_data: import numpy # converting bands into a BandsData object (including the kpoints) kpoints_for_bands = kpoints_from_output try: kpoints_from_input.get_kpoints() kpoints_for_bands.labels = kpoints_from_input.labels except (AttributeError, ValueError, TypeError): # AttributeError: no list of kpoints in input # ValueError: labels from input do not match the output # list of kpoints (some kpoints are missing) # TypeError: labels are not set, so kpoints_from_input.labels=None pass # get the bands occupations. # correct the occupations of QE: if it computes only one component, # it occupies it with half number of electrons try: bands_data['occupations'][1] the_occupations = bands_data['occupations'] except IndexError: the_occupations = 2. * numpy.array( bands_data['occupations'][0]) try: bands_data['bands'][1] bands_energies = bands_data['bands'] except IndexError: bands_energies = bands_data['bands'][0] the_bands_data = BandsData() the_bands_data.set_kpointsdata(kpoints_for_bands) the_bands_data.set_bands(bands_energies, units=bands_data['bands_units'], occupations=the_occupations) new_nodes_list += [('output_band', the_bands_data)] out_dict['linknames_band'] = ['output_band'] # convert the dictionary into an AiiDA object output_params = ParameterData(dict=out_dict) # return it to the execmanager new_nodes_list.append((self.get_linkname_outparams(), output_params)) if trajectory_data: import numpy from aiida.orm.data.array.trajectory import TrajectoryData from aiida.orm.data.array import ArrayData try: positions = numpy.array( trajectory_data.pop('atomic_positions_relax')) try: cells = numpy.array( trajectory_data.pop('lattice_vectors_relax')) # if KeyError, the MD was at fixed cell except KeyError: cells = numpy.array([in_struc.cell] * len(positions)) symbols = numpy.array( [str(i.kind_name) for i in in_struc.sites]) stepids = numpy.arange( len(positions)) # a growing integer per step # I will insert time parsing when they fix their issues about time # printing (logic is broken if restart is on) traj = TrajectoryData() traj.set_trajectory( stepids=stepids, cells=cells, symbols=symbols, positions=positions, ) for x in trajectory_data.iteritems(): traj.set_array(x[0], numpy.array(x[1])) # return it to the execmanager new_nodes_list.append( (self.get_linkname_outtrajectory(), traj)) except KeyError: # forces, atomic charges and atomic mag. moments, in scf # calculation (when outputed) arraydata = ArrayData() for x in trajectory_data.iteritems(): arraydata.set_array(x[0], numpy.array(x[1])) # return it to the execmanager new_nodes_list.append( (self.get_linkname_outarray(), arraydata)) return successful, new_nodes_list
def parse_with_retrieved(self, retrieved): """ Parses the datafolder, stores results. This parser for this simple code does simply store in the DB a node representing the file of phonon frequencies """ from aiida.common.exceptions import InvalidOperation # suppose at the start that the job is successful successful = True new_nodes_list = [] # Check that the retrieved folder is there try: out_folder = retrieved[self._calc._get_linkname_retrieved()] except KeyError: self.logger.error("No retrieved folder found") return False, () # check what is inside the folder list_of_files = out_folder.get_folder_list() # at least the stdout should exist if not self._calc._OUTPUT_FILE_NAME in list_of_files: successful = False self.logger.error("Standard output not found") return successful, () # check that the file has finished (i.e. JOB DONE is inside the file) filpath = out_folder.get_abs_path(self._calc._OUTPUT_FILE_NAME) with open(filpath, 'r') as fil: lines = fil.read() if "JOB DONE" not in lines: successful = False self.logger.error("Computation did not finish properly") # check that the phonon frequencies file is present try: # define phonon frequencies file name phonon_file = out_folder.get_abs_path( self._calc._PHONON_FREQUENCIES_NAME) except OSError: successful = False self.logger.error("File with phonon frequencies not found") return successful, new_nodes_list # extract the kpoints from the input data and create the kpointsdata for bands kpointsdata = self._calc.inp.kpoints try: kpoints = kpointsdata.get_kpoints() kpointsdata_for_bands = kpointsdata.copy() except AttributeError: kpoints = kpointsdata.get_kpoints_mesh(print_list=True) kpointsdata_for_bands = KpointsData() kpointsdata_for_bands.set_kpoints(kpoints) # find the number of kpoints num_kpoints = kpoints.shape[0] # call the raw parsing function parsed_data = parse_raw_matdyn_phonon_file(phonon_file) # extract number of kpoints read from the file (and take out from output # dictionary) try: this_num_kpoints = parsed_data.pop('num_kpoints') except KeyError: successful = False self.logger.error("Wrong number of kpoints") # warning message already in parsed_data return successful, new_nodes_list # check that the number of kpoints from the file is the same as the one # in the input kpoints if num_kpoints != this_num_kpoints: successful = False self.logger.error("Number of kpoints different in input and in " "phonon frequencies file") # extract phonon bands (and take out from output dictionary) phonon_bands = parsed_data.pop('phonon_bands') # save phonon branches into BandsData output_bands = BandsData() output_bands.set_kpointsdata(kpointsdata_for_bands) output_bands.set_bands(phonon_bands, units='THz') # convert the dictionary into an AiiDA object (here only warnings remain) output_params = ParameterData(dict=parsed_data) for message in parsed_data['warnings']: self.logger.error(message) # prepare the list of output nodes to be returned new_nodes_list = [(self.get_linkname_outparams(), output_params), (self.get_linkname_outbands(), output_bands)] return successful, new_nodes_list
def _aiida_bands_data(self, data, cell, kpoints_dict): if not data: return False kpt_idx = sorted(data.keys()) # list of kpoint indices try: k_list = [kpoints_dict[i] for i in kpt_idx] # list of k-point triplet except KeyError: # kpoint triplets are not present (true for .qp and so on, can not use BandsData) # We use the internal Yambo Format [ [Eo_1, Eo_2,... ], ...[So_1,So_2,] ] # QP_TABLE [[ib_1,ik_1,isp_1] ,[ib_n,ik_n,isp_n]] # Each entry in DATA has corresponding legend in QP_TABLE that defines its details # like ib= Band index, ik= kpoint index, isp= spin polarization index. # Eo_1 => at ib_1, ik_1 isp_1. pdata = ArrayData() QP_TABLE = [] ORD = [] Eo = [] E_minus_Eo = [] So = [] Z = [] for ky in data.keys(): # kp == kpoint index as a string 1,2,.. for ind in range(len(data[ky]['Band'])): try: Eo.append(data[ky]['Eo'][ind]) except KeyError: pass try: E_minus_Eo.append(data[ky]['E-Eo'][ind]) except KeyError: pass try: So.append(data[ky]['Sc|Eo'][ind]) except KeyError: pass try: Z.append(data[ky]['Z'][ind]) except KeyError: pass ik = int(ky) ib = data[ky]['Band'][ind] isp = 0 if 'Spin_Pol' in data[ky].keys(): isp = data[ky]['Spin_Pol'][ind] QP_TABLE.append([ik, ib, isp]) pdata.set_array('Eo', numpy.array(Eo)) pdata.set_array('E_minus_Eo', numpy.array(E_minus_Eo)) pdata.set_array('So', numpy.array(So)) pdata.set_array('Z', numpy.array(Z)) pdata.set_array('qp_table', numpy.array(QP_TABLE)) return pdata quasiparticle_bands = BandsData() quasiparticle_bands.set_cell(cell) quasiparticle_bands.set_kpoints(k_list, cartesian=True) # labels will come from any of the keys in the nested kp_point data, # there is a uniform set of observables for each k-point, ie Band, Eo, ... # ***FIXME BUG does not seem to handle spin polarizes at all when constructing bandsdata*** bands_labels = [ legend for legend in sorted(data[data.keys()[0]].keys()) ] append_list = [[] for i in bands_labels] for kp in kpt_idx: for i in range(len(bands_labels)): append_list[i].append(data[kp][bands_labels[i]]) generalised_bands = [numpy.array(it) for it in append_list] quasiparticle_bands.set_bands(bands=generalised_bands, units='eV', labels=bands_labels) return quasiparticle_bands
def spin_dependent_subparcer(out_info_dict): """ This find the projection and bands arrays from the out_file and out_info_dict. Used to handle the different possible spin-cases in a convenient manner. :param out_info_dict: contains various technical internals useful in parsing :return: ProjectionData, BandsData parsed from out_file """ out_file = out_info_dict["out_file"] spin_down = out_info_dict["spin_down"] od = out_info_dict #using a shorter name for convenience # regular expressions needed for later parsing WaveFraction1_re = re.compile(r"\=(.*?)\*") # state composition 1 WaveFractionremain_re = re.compile(r"\+(.*?)\*") # state comp 2 FunctionId_re = re.compile(r"\#(.*?)\]") # state identity # primes arrays for the later parsing num_wfc = len(od["wfc_lines"]) bands = np.zeros([od["k_states"], od["num_bands"]]) projection_arrays = np.zeros([od["k_states"], od["num_bands"], num_wfc]) try: for i in range(od["k_states"]): if spin_down: i += od["k_states"] # grabs band energy for j in range (i*od["num_bands"],(i+1)*od["num_bands"],1): out_ind = od["e_lines"][j] val = out_file[out_ind].split()[4] bands[i%od["k_states"]][j%od["num_bands"]] = val #subloop grabs pdos wave_fraction = [] wave_id = [] for k in range(od["e_lines"][j]+1,od["psi_lines"][j],1): out_line = out_file[k] wave_fraction += WaveFraction1_re.findall(out_line) wave_fraction += WaveFractionremain_re.findall(out_line) wave_id += FunctionId_re.findall(out_line) if len(wave_id) != len(wave_fraction): raise IndexError for l in range (len(wave_id)): wave_id[l] = int(wave_id[l]) wave_fraction[l] = float(wave_fraction[l]) #sets relevant values in pdos_array projection_arrays[i%od["k_states"]][ j%od["num_bands"]][wave_id[l]-1] = wave_fraction[l] except IndexError: raise QEOutputParsingError("the standard out file does not " "comply with the official " "documentation.") bands_data = BandsData() try: # Attempts to retrive the kpoints from the parent calc parent_calc = out_info_dict["parent_calc"] parent_kpoints = parent_calc.get_inputs_dict()['kpoints'] if len(od['k_vect']) != len(parent_kpoints.get_kpoints()): raise AttributeError bands_data.set_kpointsdata(parent_kpoints) except AttributeError: bands_data.set_kpoints(od['k_vect'].astype(float)) bands_data.set_bands(bands, units='eV') orbitals = out_info_dict["orbitals"] if len(orbitals) != np.shape(projection_arrays[0,0,:])[0]: raise QEOutputParsingError("There was an internal parsing error, " " the projection array shape does not agree" " with the number of orbitals") projection_data = ProjectionData() projection_data.set_reference_bandsdata(bands_data) projections = [projection_arrays[:,:,i] for i in range(len(orbitals))] # Do the bands_check manually here for projection in projections: if np.shape(projection) != np.shape(bands): raise AttributeError("Projections not the same shape as the bands") #insert here some logic to assign pdos to the orbitals pdos_arrays = spin_dependent_pdos_subparcer(out_info_dict) energy_arrays = [out_info_dict["energy"]]*len(orbitals) projection_data.set_projectiondata(orbitals, list_of_projections=projections, list_of_energy=energy_arrays, list_of_pdos=pdos_arrays, bands_check=False) # pdos=pdos_arrays return bands_data, projection_data
def band_parser(band_dat_path, band_kpt_path, special_points, structure): """ Parsers the bands output data, along with the special points retrieved from the input kpoints to construct a BandsData object which is then returned. Cannot handle discontinuities in the kpath, if two points are assigned to same spot only one will be passed. :param band_dat_path: file path to the aiida_band.dat file :param band_kpt_path: file path to the aiida_band.kpt file :param special_points: special points to add labels to the bands a dictionary in the form expected in the input as described in the wannier90 documentation :return: BandsData object constructed from the input params """ import numpy as np from aiida.orm.data.array.bands import BandsData from aiida.orm.data.array.kpoints import KpointsData # imports the data out_kpt = np.genfromtxt(band_kpt_path, skip_header=1, usecols=(0, 1, 2)) out_dat = np.genfromtxt(band_dat_path, usecols=1) # reshaps the output bands out_dat = out_dat.reshape(len(out_kpt), (len(out_dat) / len(out_kpt)), order="F") # finds expected points of discontinuity kpath = special_points['path'] cont_break = [(i, (kpath[i - 1][1], kpath[i][0])) for i in range(1, len(kpath)) if kpath[i - 1][1] != kpath[i][0]] # finds the special points special_points_dict = special_points['point_coords'] labels = [(i, k) for k in special_points_dict for i in range(len(out_kpt)) if all(np.isclose(special_points_dict[k], out_kpt[i]))] labels.sort() # Checks and appends labels if discontinuity appends = [] for x in cont_break: # two cases the break is before or the break is after # if the break is before if labels[x[0]][1] != x[1][0]: # checks to see if the discontinuity was already there if labels[x[0] - 1] == x[1][0]: continue else: insert_point = x[0] new_label = x[1][0] kpoint = labels[x[0]][0] - 1 appends += [[insert_point, new_label, kpoint]] # if the break is after if labels[x[0]][1] != x[1][1]: # checks to see if the discontinuity was already there if labels[x[0] + 1] == x[1][1]: continue else: insert_point = x[0] + 1 new_label = x[1][1] kpoint = labels[x[0]][0] + 1 appends += [[insert_point, new_label, kpoint]] appends.sort() for i in range(len(appends)): append = appends[i] labels.insert(append[0] + i, (append[2], unicode(append[1]))) bands = BandsData() k = KpointsData() k.set_cell_from_structure(structure) k.set_kpoints(out_kpt, cartesian=False) bands.set_kpointsdata(k) bands.set_bands(out_dat, units='eV') bands.labels = labels return bands
def parse_with_retrieved(self, retrieved): """ Parse the output nodes for a PwCalculations from a dictionary of retrieved nodes. Two nodes that are expected are the default 'retrieved' FolderData node which will store the retrieved files permanently in the repository. The second required node is the unstored FolderData node with the temporary retrieved files, which should be passed under the key 'retrieved_temporary_folder_key' of the Parser class. :param retrieved: a dictionary of retrieved nodes """ import os successful = True # Load the input dictionary parameters = self._calc.inp.parameters.get_dict() # Look for optional settings input node and potential 'parser_options' dictionary within it try: settings = self._calc.inp.settings.get_dict() parser_opts = settings[self.get_parser_settings_key()] except (AttributeError, KeyError): settings = {} parser_opts = {} # Check that the retrieved folder is there try: out_folder = retrieved[self._calc._get_linkname_retrieved()] except KeyError: self.logger.error("No retrieved folder found") return False, () # Verify that the retrieved_temporary_folder is within the arguments if temporary files were specified if self._calc._get_retrieve_temporary_list(): try: temporary_folder = retrieved[ self.retrieved_temporary_folder_key] dir_with_bands = temporary_folder.get_abs_path('.') except KeyError: self.logger.error( 'the {} was not passed as an argument'.format( self.retrieved_temporary_folder_key)) return False, () else: dir_with_bands = None list_of_files = out_folder.get_folder_list() # The stdout is required for parsing if self._calc._OUTPUT_FILE_NAME not in list_of_files: self.logger.error( "The standard output file '{}' was not found but is required". format(self._calc._OUTPUT_FILE_NAME)) return False, () # The xml file is required for parsing if self._calc._DATAFILE_XML_BASENAME not in list_of_files: self.logger.error( "The xml output file '{}' was not found but is required". format(self._calc._DATAFILE_XML_BASENAME)) successful = False xml_file = None else: xml_file = os.path.join(out_folder.get_abs_path('.'), self._calc._DATAFILE_XML_BASENAME) out_file = os.path.join(out_folder.get_abs_path('.'), self._calc._OUTPUT_FILE_NAME) # Call the raw parsing function parsing_args = [ out_file, parameters, parser_opts, xml_file, dir_with_bands ] out_dict, trajectory_data, structure_data, bands_data, raw_successful = parse_raw_output( *parsing_args) # If calculation was not considered failed already, use the new value successful = raw_successful if successful else successful # If the parser option 'all_symmetries' is not set to True, we reduce the raw parsed symmetries to safe space all_symmetries = parser_opts.get('all_symmetries', False) if not all_symmetries: # In the standard output, each symmetry operation print two rotation matrices: # # * S_cryst^T: matrix in crystal coordinates, transposed # * S_cart: matrix in cartesian coordinates, # # The XML files only print one matrix: # # * S_cryst: matrix in crystal coordinates # # The raw parsed symmetry information from the XML is large and will load the database heavily if stored as # is for each calculation. Instead, we will map these dictionaries onto a static dictionary of rotation # matrices generated by the _get_qe_symmetry_list static method. This dictionary will return the rotation # matrices in cartesian coordinates, i.e. S_cart. In order to compare the raw matrices from the XML to these # static matrices we have to convert S_cryst into S_cart. We derive here how that is done: # # S_cryst * v_cryst = v_cryst' # # where v_cryst' is the rotated vector v_cryst under S_cryst # We define `cell` where cell vectors are rows. Converting a vector from crystal to cartesian # coordinates is defined as: # # cell^T * v_cryst = v_cart # # The inverse of this operation is defined as # # v_cryst = cell^Tinv * v_cart # # Replacing the last equation into the first we find: # # S_cryst * cell^Tinv * v_cart = cell^Tinv * v_cart' # # Multiply on the left with cell^T gives: # # cell^T * S_cryst * cell^Tinv * v_cart = v_cart' # # which can be rewritten as: # # S_cart * v_cart = v_cart' # # where: # # S_cart = cell^T * S_cryst * cell^Tinv # # We compute here the transpose and its inverse of the structure cell basis, which is needed to transform # the parsed rotation matrices, which are in crystal coordinates, to cartesian coordinates, which are the # matrices that are returned by the _get_qe_symmetry_list staticmethod cell = structure_data['cell']['lattice_vectors'] cell_T = numpy.transpose(cell) cell_Tinv = numpy.linalg.inv(cell_T) try: if 'symmetries' in out_dict.keys(): old_symmetries = out_dict['symmetries'] new_symmetries = [] for this_sym in old_symmetries: name = this_sym['name'].strip() for i, this in enumerate(self._possible_symmetries): # Since we do an exact comparison we strip the string name from whitespace # and as soon as it is matched, we break to prevent it from matching another if name == this['name'].strip(): index = i break else: index = None self.logger.error( 'Symmetry {} not found'.format(name)) new_dict = {} if index is not None: # The raw parsed rotation matrix is in crystal coordinates, whereas the mapped rotation # in self._possible_symmetries is in cartesian coordinates. To allow them to be compared # to make sure we matched the correct rotation symmetry, we first convert the parsed matrix # to cartesian coordinates. For explanation of the method, see comment above. rotation_cryst = this_sym['rotation'] rotation_cart_new = self._possible_symmetries[ index]['matrix'] rotation_cart_old = numpy.dot( cell_T, numpy.dot(rotation_cryst, cell_Tinv)) inversion = self._possible_symmetries[index][ 'inversion'] if not are_matrices_equal( rotation_cart_old, rotation_cart_new, swap_sign_matrix_b=inversion): self.logger.error( 'Mapped rotation matrix {} does not match the original rotation {}' .format(rotation_cart_new, rotation_cart_old)) new_dict['all_symmetries'] = this_sym else: # Note: here I lose the information about equivalent ions and fractional_translation. new_dict['t_rev'] = this_sym['t_rev'] new_dict['symmetry_number'] = index else: new_dict['all_symmetries'] = this_sym new_symmetries.append(new_dict) out_dict[ 'symmetries'] = new_symmetries # and overwrite the old one except KeyError: # no symmetries were parsed (failed case, likely) self.logger.error("No symmetries were found in output") new_nodes_list = [] # I eventually save the new structure. structure_data is unnecessary after this in_struc = self._calc.get_inputs_dict()['structure'] type_calc = parameters['CONTROL']['calculation'] struc = in_struc if type_calc in ['relax', 'vc-relax', 'md', 'vc-md']: if 'cell' in structure_data.keys(): struc = convert_qe2aiida_structure(structure_data, input_structure=in_struc) new_nodes_list.append( (self.get_linkname_outstructure(), struc)) k_points_list = trajectory_data.pop('k_points', None) k_points_weights_list = trajectory_data.pop('k_points_weights', None) if k_points_list is not None: # Build the kpoints object if out_dict['k_points_units'] not in ['2 pi / Angstrom']: raise QEOutputParsingError( 'Error in kpoints units (should be cartesian)') kpoints_from_output = KpointsData() kpoints_from_output.set_cell_from_structure(struc) kpoints_from_output.set_kpoints(k_points_list, cartesian=True, weights=k_points_weights_list) kpoints_from_input = self._calc.inp.kpoints if not bands_data: try: kpoints_from_input.get_kpoints() except AttributeError: new_nodes_list += [(self.get_linkname_out_kpoints(), kpoints_from_output)] # Converting bands into a BandsData object (including the kpoints) if bands_data: kpoints_for_bands = kpoints_from_output try: kpoints_from_input.get_kpoints() kpoints_for_bands.labels = kpoints_from_input.labels except (AttributeError, ValueError, TypeError): # AttributeError: no list of kpoints in input # ValueError: labels from input do not match the output # list of kpoints (some kpoints are missing) # TypeError: labels are not set, so kpoints_from_input.labels=None pass # Get the bands occupations and correct the occupations of QE: # If it computes only one component, it occupies it with half number of electrons try: bands_data['occupations'][1] the_occupations = bands_data['occupations'] except IndexError: the_occupations = 2. * numpy.array( bands_data['occupations'][0]) try: bands_data['bands'][1] bands_energies = bands_data['bands'] except IndexError: bands_energies = bands_data['bands'][0] the_bands_data = BandsData() the_bands_data.set_kpointsdata(kpoints_for_bands) the_bands_data.set_bands(bands_energies, units=bands_data['bands_units'], occupations=the_occupations) new_nodes_list += [('output_band', the_bands_data)] out_dict['linknames_band'] = ['output_band'] # Separate the atomic_occupations dictionary in its own node if it is present atomic_occupations = out_dict.get('atomic_occupations', {}) if atomic_occupations: out_dict.pop('atomic_occupations') atomic_occupations_node = ParameterData(dict=atomic_occupations) new_nodes_list.append( ('output_atomic_occupations', atomic_occupations_node)) output_params = ParameterData(dict=out_dict) new_nodes_list.append((self.get_linkname_outparams(), output_params)) if trajectory_data: from aiida.orm.data.array.trajectory import TrajectoryData from aiida.orm.data.array import ArrayData try: positions = numpy.array( trajectory_data.pop('atomic_positions_relax')) try: cells = numpy.array( trajectory_data.pop('lattice_vectors_relax')) # if KeyError, the MD was at fixed cell except KeyError: cells = numpy.array([in_struc.cell] * len(positions)) symbols = numpy.array( [str(i.kind_name) for i in in_struc.sites]) stepids = numpy.arange( len(positions)) # a growing integer per step # I will insert time parsing when they fix their issues about time # printing (logic is broken if restart is on) traj = TrajectoryData() traj.set_trajectory( stepids=stepids, cells=cells, symbols=symbols, positions=positions, ) for x in trajectory_data.iteritems(): traj.set_array(x[0], numpy.array(x[1])) new_nodes_list.append( (self.get_linkname_outtrajectory(), traj)) except KeyError: # forces, atomic charges and atomic mag. moments, in scf calculation (when outputed) arraydata = ArrayData() for x in trajectory_data.iteritems(): arraydata.set_array(x[0], numpy.array(x[1])) new_nodes_list.append( (self.get_linkname_outarray(), arraydata)) return successful, new_nodes_list