Exemplo n.º 1
0
    def parse(self, **kwargs):
        """Receives in input a dictionary of retrieved nodes.

        Does all the logic here.
        """
        retrieved = self.retrieved

        # check what is inside the folder
        list_of_files = retrieved._repository.list_object_names()

        # options.metadata become attributes like this:
        stdout_filename = self.node.get_attribute('output_filename')
        # at least the stdout should exist
        if stdout_filename not in list_of_files:
            return self.exit(self.exit_codes.ERROR_OUTPUT_STDOUT_READ)

        # This should match 1 file
        xml_files = [
            xml_file for xml_file in self.node.process_class.xml_filenames
            if xml_file in list_of_files
        ]
        if not xml_files:
            return self.exit(self.exit_codes.ERROR_MISSING_XML_FILE)
        elif len(xml_files) > 1:
            return self.exit(self.exit_codes.ERROR_OUTPUT_XML_MULTIPLE)

        # cp.x can produce, depending on the particular version of the code, a file called `print_counter.xml` or
        # `print_counter`, which is a plain text file with the number of the last timestep written in the trajectory
        # output. Note that if no trajectory is produced (for example because a single conjugate gradient step was
        # performed to calculate the ground state and the wavefunctions velocities) no printer_counter* file is written.

        print_counter_xml = True
        no_trajectory_output = False

        filename_counter_txt = self.node.process_class._FILE_PRINT_COUNTER_BASENAME
        filename_counter_xml = self.node.process_class._FILE_XML_PRINT_COUNTER_BASENAME

        # The following can happen and is not an error!
        if filename_counter_xml not in list_of_files and filename_counter_txt not in list_of_files:
            self.logger.error(
                f'We could not find the print counter file (`{filename_counter_txt}` or `{filename_counter_xml}`), '
                'assuming no trajectory output was produced')
            no_trajectory_output = True

        if not no_trajectory_output:
            if filename_counter_txt in list_of_files:
                self.logger.info('print counter not in xml format')
                print_counter_xml = False
                filename_counter = filename_counter_txt
            else:  # xml format
                print_counter_xml = True
                self.logger.info('print counter in xml format')
                filename_counter = filename_counter_xml

        output_stdout = retrieved.get_object_content(stdout_filename)
        output_xml = retrieved.get_object_content(xml_files[0])
        output_xml_counter = None if no_trajectory_output else retrieved.get_object_content(
            filename_counter)
        out_dict, _raw_successful = parse_cp_raw_output(
            output_stdout, output_xml, output_xml_counter, print_counter_xml)

        if not no_trajectory_output:
            # parse the trajectory. Units in Angstrom, picoseconds and eV.
            # append everthing in the temporary dictionary raw_trajectory
            raw_trajectory = {}
            evp_keys = [
                'electronic_kinetic_energy', 'cell_temperature',
                'ionic_temperature', 'scf_total_energy', 'enthalpy',
                'enthalpy_plus_kinetic', 'energy_constant_motion', 'volume',
                'pressure'
            ]

            # order of atom in the output trajectory changed somewhere after 6.5
            if LooseVersion(out_dict['creator_version']) > LooseVersion('6.5'):
                new_cp_ordering = True
            else:
                new_cp_ordering = False

            # Now prepare the reordering, as files in the xml are ordered
            if new_cp_ordering:
                reordering = None
            else:
                try:
                    # this works for old xml only
                    reordering = self._generate_sites_ordering(
                        out_dict['species'], out_dict['atoms'])
                except KeyError:
                    # this works for newer versions
                    reordering = self._generate_sites_ordering(
                        out_dict['structure']['species'],
                        out_dict['structure']['atoms'])

            pos_filename = f'{self.node.process_class._PREFIX}.pos'
            if pos_filename not in list_of_files:
                out_dict['warnings'].append(
                    'Unable to open the POS file... skipping.')
                return self.exit_codes.ERROR_READING_POS_FILE
            number_of_atoms = out_dict.get(
                'number_of_atoms', out_dict['structure']['number_of_atoms']
                if 'structure' in out_dict else None)
            trajectories = [
                ('positions', 'pos', CONSTANTS.bohr_to_ang, number_of_atoms),
                ('cells', 'cel', CONSTANTS.bohr_to_ang, 3),
                ('velocities', 'vel',
                 CONSTANTS.bohr_to_ang / (CONSTANTS.timeau_to_sec * 10**12),
                 number_of_atoms),
                ('forces', 'for',
                 CONSTANTS.hartree_to_ev / CONSTANTS.bohr_to_ang,
                 number_of_atoms),
            ]

            for name, extension, scale, elements in trajectories:
                try:
                    with retrieved.open(
                            f'{self.node.process_class._PREFIX}.{extension}'
                    ) as datafile:
                        data = [l.split() for l in datafile]
                        # POSITIONS stored in angstrom
                    traj_data = parse_cp_traj_stanzas(
                        num_elements=elements,
                        splitlines=data,
                        prepend_name=f'{name}_traj',
                        rescale=scale)
                    # here initialize the dictionary. If the parsing of positions fails, though, I don't have anything
                    # out of the CP dynamics. Therefore, the calculation status is set to FAILED.
                    if extension != 'cel':
                        raw_trajectory[
                            f'{name}_ordered'] = self._get_reordered_array(
                                traj_data[f'{name}_traj_data'], reordering)
                    else:
                        # NOTE: the trajectory output has the cell matrix transposed!!
                        raw_trajectory['cells'] = numpy.array(
                            traj_data['cells_traj_data']).transpose((0, 2, 1))
                    if extension == 'pos':
                        raw_trajectory['traj_times'] = numpy.array(
                            traj_data[f'{name}_traj_times'])
                except IOError:
                    out_dict['warnings'].append(
                        f'Unable to open the {extension.upper()} file... skipping.'
                    )

            # =============== EVP trajectory ============================
            try:
                with retrieved.open(
                        f'{self._node.process_class._PREFIX}.evp') as handle:
                    matrix = numpy.genfromtxt(handle)
                # there might be a different format if the matrix has one row only
                try:
                    matrix.shape[1]
                except IndexError:
                    matrix = numpy.array(numpy.matrix(matrix))

                if LooseVersion(
                        out_dict['creator_version']) > LooseVersion('5.1'):
                    # Between version 5.1 and 5.1.1, someone decided to change
                    # the .evp output format, without any way to know that this
                    # happened... SVN commit 11158.
                    # I here use the version number to parse, plus some
                    # heuristics to check that I'm doing the right thing
                    #print "New version"
                    raw_trajectory['steps'] = numpy.array(matrix[:, 0],
                                                          dtype=int)
                    raw_trajectory['times'] = matrix[:, 1]  # TPS, ps
                    raw_trajectory[
                        'electronic_kinetic_energy'] = matrix[:,
                                                              2] * CONSTANTS.hartree_to_ev  # EKINC, eV
                    raw_trajectory['cell_temperature'] = matrix[:,
                                                                3]  # TEMPH, K
                    raw_trajectory['ionic_temperature'] = matrix[:,
                                                                 4]  # TEMPP, K
                    raw_trajectory[
                        'scf_total_energy'] = matrix[:,
                                                     5] * CONSTANTS.hartree_to_ev  # ETOT, eV
                    raw_trajectory[
                        'enthalpy'] = matrix[:,
                                             6] * CONSTANTS.hartree_to_ev  # ENTHAL, eV
                    raw_trajectory[
                        'enthalpy_plus_kinetic'] = matrix[:,
                                                          7] * CONSTANTS.hartree_to_ev  # ECONS, eV
                    raw_trajectory[
                        'energy_constant_motion'] = matrix[:,
                                                           8] * CONSTANTS.hartree_to_ev  # ECONT, eV
                    raw_trajectory['volume'] = matrix[:, 9] * (
                        CONSTANTS.bohr_to_ang**3)  # volume, angstrom^3
                    raw_trajectory['pressure'] = matrix[:,
                                                        10]  # out_press, GPa
                else:
                    #print "Old version"
                    raw_trajectory['steps'] = numpy.array(matrix[:, 0],
                                                          dtype=int)
                    raw_trajectory[
                        'electronic_kinetic_energy'] = matrix[:,
                                                              1] * CONSTANTS.hartree_to_ev  # EKINC, eV
                    raw_trajectory['cell_temperature'] = matrix[:,
                                                                2]  # TEMPH, K
                    raw_trajectory['ionic_temperature'] = matrix[:,
                                                                 3]  # TEMPP, K
                    raw_trajectory[
                        'scf_total_energy'] = matrix[:,
                                                     4] * CONSTANTS.hartree_to_ev  # ETOT, eV
                    raw_trajectory[
                        'enthalpy'] = matrix[:,
                                             5] * CONSTANTS.hartree_to_ev  # ENTHAL, eV
                    raw_trajectory[
                        'enthalpy_plus_kinetic'] = matrix[:,
                                                          6] * CONSTANTS.hartree_to_ev  # ECONS, eV
                    raw_trajectory[
                        'energy_constant_motion'] = matrix[:,
                                                           7] * CONSTANTS.hartree_to_ev  # ECONT, eV
                    raw_trajectory['volume'] = matrix[:, 8] * (
                        CONSTANTS.bohr_to_ang**3)  # volume, angstrom^3
                    raw_trajectory['pressure'] = matrix[:, 9]  # out_press, GPa
                    raw_trajectory['times'] = matrix[:, 10]  # TPS, ps

                # Huristics to understand if it's correct.
                # A better heuristics could also try to fix possible issues
                # (in new versions of QE, it's possible to recompile it with
                # the __OLD_FORMAT flag to get back the old version format...)
                # but I won't do it, as there may be also other columns swapped.
                # Better to stop and ask the user to check what's going on.

                #work around for 100ps format bug
                mask = numpy.array(raw_trajectory['traj_times']) >= 0
                len_bugged = len(
                    numpy.array(raw_trajectory['times'])[mask == False])
                len_ok = len(numpy.array(raw_trajectory['times'])[mask])
                if len_ok > 0:
                    max_time_difference = abs(
                        numpy.array(raw_trajectory['times'])[mask] -
                        numpy.array(raw_trajectory['traj_times'])[mask]).max()
                else:
                    max_time_difference = 0.0

                if max_time_difference > 1.e-4 or (
                        len_bugged > 0 and numpy.array(raw_trajectory['times'])
                    [mask == False].min() < 100.0
                ):  # It is typically ~1.e-7 due to roundoff errors
                    # If there is a large discrepancy
                    # it means there is something very weird going on...
                    return self.exit_codes.ERROR_READING_TRAJECTORY_DATA

                # keep both times array (that usually are duplicated)
                # so that the user can check them by himselves
                if len_bugged > 0:
                    out_dict['warnings'].append(
                        '100ps format bug detected: ignoring trajectory\'s printed time from 100ps on'
                    )
            except IOError:
                out_dict['warnings'].append(
                    'Unable to open the EVP file... skipping.')

            # get the symbols from the input
            # TODO: I should have kinds in TrajectoryData
            input_structure = self.node.inputs.structure
            raw_trajectory['symbols'] = [
                str(i.kind_name) for i in input_structure.sites
            ]

            traj = TrajectoryData()
            traj.set_trajectory(
                stepids=raw_trajectory['steps'],
                cells=raw_trajectory['cells'],
                symbols=raw_trajectory['symbols'],
                positions=raw_trajectory['positions_ordered'],
                times=raw_trajectory['times'],
                velocities=raw_trajectory['velocities_ordered'],
            )

            # eventually set the forces
            try:
                traj.set_array('forces', raw_trajectory['forces_ordered'])
            except KeyError:
                out_dict['warnings'].append('failed to set forces')

            for this_name in evp_keys:
                try:
                    traj.set_array(this_name, raw_trajectory[this_name])
                except KeyError:
                    # Some columns may have not been parsed, skip
                    pass

            self.out('output_trajectory', traj)

        # Remove big dictionaries that would be redundant
        # For atoms and cell, there is a small possibility that nothing is parsed but then probably nothing moved.
        for key in [
                'atoms', 'cell', 'ions_positions_stau', 'ions_positions_svel',
                'ions_positions_taui', 'atoms_index_list', 'atoms_if_pos_list',
                'ions_positions_force', 'bands', 'structure'
        ]:
            out_dict.pop(key, None)

        # convert the dictionary into an AiiDA object
        output_params = Dict(dict=out_dict)
        self.out('output_parameters', output_params)
Exemplo n.º 2
0
    def parse(self, **kwargs):
        """Receives in input a dictionary of retrieved nodes.

        Does all the logic here.
        """
        try:
            out_folder = self.retrieved
        except NotExistent:
            return self.exit(self.exit_codes.ERROR_NO_RETRIEVED_FOLDER)

        # check what is inside the folder
        list_of_files = out_folder._repository.list_object_names()

        # options.metadata become attributes like this:
        stdout_filename = self.node.get_attribute('output_filename')
        # at least the stdout should exist
        if stdout_filename not in list_of_files:
            return self.exit(self.exit_codes.ERROR_OUTPUT_STDOUT_READ)

        # This should match 1 file
        xml_files = [
            xml_file for xml_file in self.node.process_class.xml_filenames
            if xml_file in list_of_files
        ]
        if not xml_files:
            return self.exit(self.exit_codes.ERROR_MISSING_XML_FILE)
        elif len(xml_files) > 1:
            return self.exit(self.exit_codes.ERROR_OUTPUT_XML_MULTIPLE)

        if self.node.process_class._FILE_XML_PRINT_COUNTER_BASENAME not in list_of_files:
            self.logger.error(
                'We could not find the print counter file in the output')
            # TODO: Add an error for this counter
            return self.exit(self.exit_codes.ERROR_MISSING_XML_FILE)

        output_stdout = out_folder.get_object_content(stdout_filename)
        output_xml = out_folder.get_object_content(xml_files[0])
        output_xml_counter = out_folder.get_object_content(
            self.node.process_class._FILE_XML_PRINT_COUNTER_BASENAME)
        out_dict, _raw_successful = parse_cp_raw_output(
            output_stdout, output_xml, output_xml_counter)

        # parse the trajectory. Units in Angstrom, picoseconds and eV.
        # append everthing in the temporary dictionary raw_trajectory
        raw_trajectory = {}
        evp_keys = [
            'electronic_kinetic_energy', 'cell_temperature',
            'ionic_temperature', 'scf_total_energy', 'enthalpy',
            'enthalpy_plus_kinetic', 'energy_constant_motion', 'volume',
            'pressure'
        ]

        # Now prepare the reordering, as filex in the xml are  ordered
        reordering = self._generate_sites_ordering(out_dict['species'],
                                                   out_dict['atoms'])

        pos_filename = '{}.{}'.format(self.node.process_class._PREFIX, 'pos')
        if pos_filename not in list_of_files:
            return self.exit(self.exit_codes.ERROR_READING_POS_FILE)

        trajectories = [
            ('positions', 'pos', CONSTANTS.bohr_to_ang,
             out_dict['number_of_atoms']),
            ('cells', 'cel', CONSTANTS.bohr_to_ang, 3),
            ('velocities', 'vel',
             CONSTANTS.bohr_to_ang / CONSTANTS.timeau_to_sec * 10**12,
             out_dict['number_of_atoms']),
        ]

        for name, extension, scale, elements in trajectories:
            try:
                with out_folder.open('{}.{}'.format(
                        self.node.process_class._PREFIX,
                        extension)) as datafile:
                    data = [l.split() for l in datafile]
                    # POSITIONS stored in angstrom
                traj_data = parse_cp_traj_stanzas(
                    num_elements=elements,
                    splitlines=data,
                    prepend_name='{}_traj'.format(name),
                    rescale=scale)
                # here initialize the dictionary. If the parsing of positions fails, though, I don't have anything
                # out of the CP dynamics. Therefore, the calculation status is set to FAILED.
                if extension != 'cel':
                    raw_trajectory['{}_ordered'.format(
                        name)] = self._get_reordered_array(
                            traj_data['{}_traj_data'.format(name)], reordering)
                else:
                    raw_trajectory['cells'] = numpy.array(
                        traj_data['cells_traj_data'])
                if extension == 'pos':
                    raw_trajectory['times'] = numpy.array(
                        traj_data['{}_traj_times'.format(name)])
            except IOError:
                out_dict['warnings'].append(
                    'Unable to open the {} file... skipping.'.format(
                        extension.upper()))

        # =============== EVP trajectory ============================
        try:
            with out_folder.open('{}.evp'.format(
                    self._node.process_class._PREFIX)) as handle:
                matrix = numpy.genfromtxt(handle)
            # there might be a different format if the matrix has one row only
            try:
                matrix.shape[1]
            except IndexError:
                matrix = numpy.array(numpy.matrix(matrix))

            if LooseVersion(out_dict['creator_version']) > LooseVersion('5.1'):
                # Between version 5.1 and 5.1.1, someone decided to change
                # the .evp output format, without any way to know that this
                # happened... SVN commit 11158.
                # I here use the version number to parse, plus some
                # heuristics to check that I'm doing the right thing
                #print "New version"
                raw_trajectory['steps'] = numpy.array(matrix[:, 0], dtype=int)
                raw_trajectory['evp_times'] = matrix[:, 1]  # TPS, ps
                raw_trajectory[
                    'electronic_kinetic_energy'] = matrix[:,
                                                          2] * CONSTANTS.hartree_to_ev  # EKINC, eV
                raw_trajectory['cell_temperature'] = matrix[:, 3]  # TEMPH, K
                raw_trajectory['ionic_temperature'] = matrix[:, 4]  # TEMPP, K
                raw_trajectory[
                    'scf_total_energy'] = matrix[:,
                                                 5] * CONSTANTS.hartree_to_ev  # ETOT, eV
                raw_trajectory[
                    'enthalpy'] = matrix[:,
                                         6] * CONSTANTS.hartree_to_ev  # ENTHAL, eV
                raw_trajectory[
                    'enthalpy_plus_kinetic'] = matrix[:,
                                                      7] * CONSTANTS.hartree_to_ev  # ECONS, eV
                raw_trajectory[
                    'energy_constant_motion'] = matrix[:,
                                                       8] * CONSTANTS.hartree_to_ev  # ECONT, eV
                raw_trajectory['volume'] = matrix[:, 9] * (
                    CONSTANTS.bohr_to_ang**3)  # volume, angstrom^3
                raw_trajectory['pressure'] = matrix[:, 10]  # out_press, GPa
            else:
                #print "Old version"
                raw_trajectory['steps'] = numpy.array(matrix[:, 0], dtype=int)
                raw_trajectory[
                    'electronic_kinetic_energy'] = matrix[:,
                                                          1] * CONSTANTS.hartree_to_ev  # EKINC, eV
                raw_trajectory['cell_temperature'] = matrix[:, 2]  # TEMPH, K
                raw_trajectory['ionic_temperature'] = matrix[:, 3]  # TEMPP, K
                raw_trajectory[
                    'scf_total_energy'] = matrix[:,
                                                 4] * CONSTANTS.hartree_to_ev  # ETOT, eV
                raw_trajectory[
                    'enthalpy'] = matrix[:,
                                         5] * CONSTANTS.hartree_to_ev  # ENTHAL, eV
                raw_trajectory[
                    'enthalpy_plus_kinetic'] = matrix[:,
                                                      6] * CONSTANTS.hartree_to_ev  # ECONS, eV
                raw_trajectory[
                    'energy_constant_motion'] = matrix[:,
                                                       7] * CONSTANTS.hartree_to_ev  # ECONT, eV
                raw_trajectory['volume'] = matrix[:, 8] * (
                    CONSTANTS.bohr_to_ang**3)  # volume, angstrom^3
                raw_trajectory['pressure'] = matrix[:, 9]  # out_press, GPa
                raw_trajectory['evp_times'] = matrix[:, 10]  # TPS, ps

            # Huristics to understand if it's correct.
            # A better heuristics could also try to fix possible issues
            # (in new versions of QE, it's possible to recompile it with
            # the __OLD_FORMAT flag to get back the old version format...)
            # but I won't do it, as there may be also other columns swapped.
            # Better to stop and ask the user to check what's going on.
            max_time_difference = abs(
                numpy.array(raw_trajectory['times']) -
                numpy.array(raw_trajectory['evp_times'])).max()
            if max_time_difference > 1.e-4:  # It is typically ~1.e-7 due to roundoff errors
                # If there is a large discrepancy
                # it means there is something very weird going on...
                return self.exit(self.exit_codes.ERROR_READING_TRAJECTORY_DATA)

            # Delete evp_times in any case, it's a duplicate of 'times'
            del raw_trajectory['evp_times']
        except IOError:
            out_dict['warnings'].append(
                'Unable to open the EVP file... skipping.')

        # get the symbols from the input
        # TODO: I should have kinds in TrajectoryData
        input_structure = self.node.inputs.structure
        raw_trajectory['symbols'] = [
            str(i.kind_name) for i in input_structure.sites
        ]

        traj = TrajectoryData()
        traj.set_trajectory(
            stepids=raw_trajectory['steps'],
            cells=raw_trajectory['cells'],
            symbols=raw_trajectory['symbols'],
            positions=raw_trajectory['positions_ordered'],
            times=raw_trajectory['times'],
            velocities=raw_trajectory['velocities_ordered'],
        )

        for this_name in evp_keys:
            try:
                traj.set_array(this_name, raw_trajectory[this_name])
            except KeyError:
                # Some columns may have not been parsed, skip
                pass

        self.out('output_trajectory', traj)

        # Remove big dictionaries that would be redundant
        # For atoms and cell, there is a small possibility that nothing is parsed
        # but then probably nothing moved.
        try:
            del out_dict['atoms']
        except KeyError:
            pass
        try:
            del out_dict['cell']
        except KeyError:
            pass
        try:
            del out_dict['ions_positions_stau']
        except KeyError:
            pass
        try:
            del out_dict['ions_positions_svel']
        except KeyError:
            pass
        try:
            del out_dict['ions_positions_taui']
        except KeyError:
            pass
        # This should not be needed
        try:
            del out_dict['atoms_index_list']
        except KeyError:
            pass
        # This should be already in the input
        try:
            del out_dict['atoms_if_pos_list']
        except KeyError:
            pass
        #
        try:
            del out_dict['ions_positions_force']
        except KeyError:
            pass

        # convert the dictionary into an AiiDA object
        output_params = Dict(dict=out_dict)
        self.out('output_parameters', output_params)
Exemplo n.º 3
0
    def parse(self, **kwargs):
        """
        Receives a dictionary of retrieved nodes.retrieved.
        Top level logic of operation
        """

        try:
            retrieved = self.retrieved
        except exceptions.NotExistent:
            return self.exit_codes.ERROR_NO_RETRIEVED_FOLDER

        output_folder = RetrievedFileManager(
            retrieved, kwargs.get('retrieved_temporary_folder'))

        warnings = []
        exit_code_1 = None

        # NOTE parser options not used for not
        parser_opts = {}

        # NOT READILY IN USE
        input_dict = {}

        # check what is inside the folder
        filenames = output_folder.list_object_names()

        # Get calculation options
        options = self.node.get_options()
        seedname = options['seedname']

        # at least the stdout should exist
        if options['output_filename'] not in filenames:
            self.logger.error("Standard output not found")
            return self.exit_codes.ERROR_NO_OUTPUT_FILE

        # The calculation is failed if there is any err file.
        err_filenames = [fname for fname in filenames if '.err' in fname]
        if err_filenames:
            exit_code_1 = 'ERROR_CASTEP_ERROR'

        # Add the content of err files
        err_contents = set()
        for fname in err_filenames:
            err_contents.add(output_folder.get_object_content(fname))

        # Trajectory files
        has_md_geom = False
        out_md_geom_name_content = None
        for suffix in ('.geom', '.md'):
            fname = seedname + suffix
            if fname in filenames:
                out_md_geom_name_content = (
                    fname, output_folder.get_object_content(fname).split('\n'))
                has_md_geom = True
                break

        # Handling bands
        fname = seedname + '.bands'
        has_bands = fname in filenames
        if has_bands:
            out_bands_content = output_folder.get_object_content(fname).split(
                '\n')
        else:
            out_bands_content = None

        out_file = options['output_filename']
        out_file_content = output_folder.get_object_content(out_file).split(
            '\n')

        ###### CALL THE RAW PASSING FUNCTION TO PARSE DATA #######

        raw_parser = RawParser(out_lines=out_file_content,
                               input_dict=input_dict,
                               md_geom_info=out_md_geom_name_content,
                               bands_lines=out_bands_content,
                               **parser_opts)
        out_dict, trajectory_data, structure_data, bands_data, exit_code_2\
            = raw_parser.parse()

        # Combine the exit codes use the more specific error
        exit_code = None
        for code in calc_exit_code:
            if code in (exit_code_2, exit_code_1):
                exit_code = code
                break

        # Append the final value of trajectory_data into out_dict
        last_value_keys = [
            "free_energy", "total_energy", "zero_K_energy", "spin_density",
            "abs_spin_density", "enthalpy"
        ]
        for key in last_value_keys:
            add_last_if_exists(trajectory_data, key, out_dict)

        # Add warnings from this level
        out_dict["warnings"].extend(warnings)

        # Add error messages
        out_dict["error_messages"] = list(err_contents)

        ######## --- PROCESSING BANDS DATA -- ########
        if has_bands or output_folder.has_file(seedname + '.castep_bin'):
            # Only use castep_bin if we are interested in SCF kpoints
            if output_folder.has_file(seedname + '.castep_bin') and (
                    self.castep_task.lower() not in NON_SCF_BAND_TASKS):
                self.logger.info("Using castep_bin file for the bands data.")
                bands_node = bands_from_castepbin(seedname, output_folder)
                if not self._has_empty_bands(bands_node):
                    # Set if no other errors
                    out_dict["warnings"].append(
                        "At least one kpoint has no empty bands, energy/forces returned are not reliable."
                    )
                    if exit_code == 'CALC_FINISHED':
                        exit_code = "ERROR_NO_EMPTY_BANDS"
            else:
                bands_node = bands_to_bandsdata(**bands_data)
            self.out(out_ln['bands'], bands_node)

        ######## --- PROCESSING STRUCTURE DATA --- ########
        no_optimise = False
        try:
            cell = structure_data["cell"]
            positions = structure_data["positions"]
            symbols = structure_data["symbols"]

        except KeyError:
            # Handle special case where CASTEP founds nothing to optimise,
            # hence we attached the input geometry as the output
            for warning in out_dict["warnings"]:
                if "there is nothing to optimise" in warning:
                    no_optimise = True
            if no_optimise is True:
                self.out(out_ln['structure'],
                         deepcopy(self.node.inputs.structure))
        else:
            structure_node = structure_from_input(cell=cell,
                                                  positions=positions,
                                                  symbols=symbols)
            # Use the output label as the input label
            input_structure = self.node.inputs.structure
            structure_node = desort_structure(structure_node, input_structure)
            structure_node.label = input_structure.label
            self.out(out_ln['structure'], structure_node)

        ######### --- PROCESSING TRAJECTORY DATA --- ########
        # If there is anything to save
        # It should...
        if trajectory_data:

            # Resorting indices - for recovering the original ordering of the
            # species in the input structure
            input_structure = self.node.inputs.structure
            idesort = get_desort_args(input_structure)
            # If we have .geom file, save as in a trajectory data
            if has_md_geom:
                try:
                    positions = np.asarray(
                        trajectory_data["positions"])[:, idesort]
                    cells = trajectory_data["cells"]
                    # Assume symbols do not change - symbols are the same for all frames
                    symbols = np.asarray(trajectory_data["symbols"])[idesort]
                    stepids = np.arange(len(positions))

                except KeyError:
                    out_dict["parser_warning"].append(
                        "Cannot "
                        "extract data from .geom file.")

                else:
                    traj = TrajectoryData()
                    traj.set_trajectory(stepids=np.asarray(stepids),
                                        cells=np.asarray(cells),
                                        symbols=np.asarray(symbols),
                                        positions=np.asarray(positions))
                    # Save the rest
                    for name, value in trajectory_data.items():
                        # Skip saving empty arrays
                        if len(value) == 0:
                            continue

                        array = np.asarray(value)
                        # For forces/velocities we also need to resort the array
                        if ("force" in name) or ("velocities" in name):
                            array = array[:, idesort]
                        traj.set_array(name, np.asarray(value))
                    self.out(out_ln['trajectory'], traj)

            # Or may there is nothing to optimise? still save a Trajectory data
            elif no_optimise is True:
                traj = TrajectoryData()
                input_structure = self.node.inputs.structure
                traj.set_trajectory(stepids=np.asarray([1]),
                                    cells=np.asarray([input_structure.cell]),
                                    symbols=np.asarray([
                                        site.kind_name
                                        for site in input_structure.sites
                                    ]),
                                    positions=np.asarray([[
                                        site.position
                                        for site in input_structure.sites
                                    ]]))
                # Save the rest
                for name, value in trajectory_data.items():
                    # Skip saving empty arrays
                    if len(value) == 0:
                        continue

                    array = np.asarray(value)
                    # For forces/velocities we also need to resort the array
                    if ("force" in name) or ("velocities" in name):
                        array = array[:, idesort]
                    traj.set_array(name, np.asarray(value))
                self.out(out_ln['trajectory'], traj)
            # Otherwise, save data into a ArrayData node
            else:
                out_array = ArrayData()
                for name, value in trajectory_data.items():
                    # Skip saving empty arrays
                    if len(value) == 0:
                        continue
                    array = np.asarray(value)
                    if ("force" in name) or ("velocities" in name):
                        array = array[:, idesort]
                    out_array.set_array(name, np.asarray(value))
                self.out(out_ln['array'], out_array)

        ######## ---- PROCESSING OUTPUT DATA --- ########
        output_params = Dict(dict=out_dict)
        self.out(out_ln['results'], output_params)

        # Return the exit code
        return self.exit_codes.__getattr__(exit_code)
Exemplo n.º 4
0
    def parse(self, **kwargs):
        """
        Parses the datafolder, stores results.
        """
        # retrieve resources
        resources, exit_code = self.get_parsing_resources(
            kwargs, traj_in_temp=True, sys_info=True)
        if exit_code is not None:
            return exit_code
        trajectory_filename, trajectory_filepath, info_filepath = resources

        # parse log file
        log_data, exit_code = self.parse_log_file()
        if exit_code is not None:
            return exit_code

        # parse trajectory file
        try:
            timestep = self.node.inputs.parameters.dict.timestep
            positions, charges, step_ids, cells, symbols, time = read_lammps_trajectory(
                trajectory_filepath, timestep=timestep,
                log_warning_func=self.logger.warning)
        except Exception:
            traceback.print_exc()
            return self.exit_codes.ERROR_TRAJ_PARSING

        # save results into node
        output_data = log_data["data"]
        if 'units_style' in output_data:
            output_data.update(get_units_dict(output_data['units_style'],
                                              ["distance", "time", "energy"]))
        else:
            self.logger.warning("units missing in log")
        self.add_warnings_and_errors(output_data)
        self.add_standard_info(output_data)
        parameters_data = Dict(dict=output_data)
        self.out('results', parameters_data)

        # save trajectories into node
        trajectory_data = TrajectoryData()
        trajectory_data.set_trajectory(
            symbols, positions, stepids=step_ids, cells=cells, times=time)
        if charges is not None:
            trajectory_data.set_array('charges', charges)       
        self.out('trajectory_data', trajectory_data)

        # parse the system data file
        if info_filepath:
            sys_data = ArrayData()
            try:
                with open(info_filepath) as handle:
                    names = handle.readline().strip().split()
                for i, col in enumerate(np.loadtxt(info_filepath, skiprows=1, unpack=True)):
                    sys_data.set_array(names[i], col)
            except Exception:
                traceback.print_exc()
                return self.exit_codes.ERROR_INFO_PARSING
            sys_data.set_attribute('units_style', output_data.get('units_style', None))
            self.out('system_data', sys_data)

        if output_data["errors"]:
            return self.exit_codes.ERROR_LAMMPS_RUN
Exemplo n.º 5
0
    def _parse_trajectory(self):
        """Abinit trajectory parser."""
        def _voigt_to_tensor(voigt):
            tensor = np.zeros((3, 3))
            tensor[0, 0] = voigt[0]
            tensor[1, 1] = voigt[1]
            tensor[2, 2] = voigt[2]
            tensor[1, 2] = voigt[3]
            tensor[0, 2] = voigt[4]
            tensor[0, 1] = voigt[5]
            tensor[2, 1] = tensor[1, 2]
            tensor[2, 0] = tensor[0, 2]
            tensor[1, 0] = tensor[0, 1]
            return tensor

        # Absolute path of the folder in which aiidao_GSR.nc is stored
        path = self.node.get_remote_workdir()
        # HIST Abinit NetCDF file - Default name is aiidao_HIST.nc
        fname = f'{self.node.get_attribute("prefix")}o_HIST.nc'

        if fname not in self.retrieved.list_object_names():
            return self.exit_codes.ERROR_MISSING_OUTPUT_FILES

        with HistFile(path + '/' + fname) as hist_file:
            structures = hist_file.structures

        output_structure = StructureData(pymatgen=structures[-1])

        with nc.Dataset(path + '/' + fname, 'r') as data_set:  # pylint: disable=no-member
            n_steps = data_set.dimensions['time'].size
            energy_ha = data_set.variables['etotal'][:].data  # Ha
            energy_kin_ha = data_set.variables['ekin'][:].data  # Ha
            forces_cart_ha_bohr = data_set.variables[
                'fcart'][:, :, :].data  # Ha/bohr
            positions_cart_bohr = data_set.variables[
                'xcart'][:, :, :].data  # bohr
            stress_voigt = data_set.variables['strten'][:, :].data  # Ha/bohr^3

        stepids = np.arange(n_steps)
        symbols = np.array([specie.symbol for specie in structures[0].species],
                           dtype='<U2')
        cells = np.array(
            [structure.lattice.matrix for structure in structures]).reshape(
                (n_steps, 3, 3))
        energy = energy_ha * units.Ha_to_eV
        energy_kin = energy_kin_ha * units.Ha_to_eV
        forces = forces_cart_ha_bohr * units.Ha_to_eV / units.bohr_to_ang
        positions = positions_cart_bohr * units.bohr_to_ang
        stress = np.array([_voigt_to_tensor(sv) for sv in stress_voigt
                           ]) * units.Ha_to_eV / units.bohr_to_ang**3
        total_force = np.array([np.sum(f) for f in forces_cart_ha_bohr
                                ]) * units.Ha_to_eV / units.bohr_to_ang

        output_trajectory = TrajectoryData()
        output_trajectory.set_trajectory(stepids=stepids,
                                         cells=cells,
                                         symbols=symbols,
                                         positions=positions)
        output_trajectory.set_array('energy', energy)  # eV
        output_trajectory.set_array('energy_kin', energy_kin)  # eV
        output_trajectory.set_array('forces', forces)  # eV/angstrom
        output_trajectory.set_array('stress', stress)  # eV/angstrom^3
        output_trajectory.set_array('total_force', total_force)  # eV/angstrom

        self.out('output_trajectory', output_trajectory)
        self.out('output_structure', output_structure)