Exemplo n.º 1
0
    def opt_with_frequency_flattener(
        cls,
        qchem_command,
        multimode="openmp",
        input_file="mol.qin",
        output_file="mol.qout",
        qclog_file="mol.qclog",
        max_iterations=10,
        max_molecule_perturb_scale=0.3,
        check_connectivity=True,
        linked=True,
        transition_state=False,
        freq_before_opt=False,
        save_final_scratch=False,
        **QCJob_kwargs,
    ):
        """
        Optimize a structure and calculate vibrational frequencies to check if the
        structure is in a true minima.

        If there are an inappropriate number of imaginary frequencies (>0 for a
         minimum-energy structure, >1 for a transition-state), attempt to re-calculate
         using one of two methods:
            - Perturb the geometry based on the imaginary frequencies and re-optimize
            - Use the exact Hessian to inform a subsequent optimization
         After each geometry optimization, the frequencies are re-calculated to
         determine if a true minimum (or transition-state) has been found.

        Note: Very small imaginary frequencies (-15cm^-1 < nu < 0) are allowed
        if there is only one more than there should be. In other words, if there
        is one very small imaginary frequency, it is still treated as a minimum,
        and if there is one significant imaginary frequency and one very small
        imaginary frequency, it is still treated as a transition-state.

        Args:
            qchem_command (str): Command to run QChem.
            multimode (str): Parallelization scheme, either openmp or mpi.
            input_file (str): Name of the QChem input file.
            output_file (str): Name of the QChem output file.
            max_iterations (int): Number of perturbation -> optimization -> frequency
                iterations to perform. Defaults to 10.
            max_molecule_perturb_scale (float): The maximum scaled perturbation that
                can be applied to the molecule. Defaults to 0.3.
            check_connectivity (bool): Whether to check differences in connectivity
                introduced by structural perturbation. Defaults to True.
            linked (bool): Whether or not to use the linked flattener. If set to True (default),
                then the explicit Hessians from a vibrational frequency analysis will be used
                as the initial Hessian of subsequent optimizations. In many cases, this can
                significantly improve optimization efficiency.
            transition_state (bool): If True (default False), use a ts
                optimization (search for a saddle point instead of a minimum)
            freq_before_opt (bool): If True (default False), run a frequency
                calculation before any opt/ts searches to improve understanding
                of the local potential energy surface.
            save_final_scratch (bool): Whether to save full scratch directory contents
                at the end of the flattening. Defaults to False.
            **QCJob_kwargs: Passthrough kwargs to QCJob. See
                :class:`custodian.qchem.jobs.QCJob`.
        """
        if not os.path.exists(input_file):
            raise AssertionError("Input file must be present!")

        if transition_state:
            opt_method = "ts"
            perturb_index = 1
        else:
            opt_method = "opt"
            perturb_index = 0

        energy_diff_cutoff = 0.0000001

        orig_input = QCInput.from_file(input_file)
        freq_rem = copy.deepcopy(orig_input.rem)
        freq_rem["job_type"] = "freq"
        opt_rem = copy.deepcopy(orig_input.rem)
        opt_rem["job_type"] = opt_method
        first = True
        energy_history = []

        if freq_before_opt:
            if not linked:
                warnings.warn(
                    "WARNING: This first frequency calculation will not inform subsequent optimization!"
                )
            yield (QCJob(
                qchem_command=qchem_command,
                multimode=multimode,
                input_file=input_file,
                output_file=output_file,
                qclog_file=qclog_file,
                suffix=".freq_pre",
                save_scratch=True,
                backup=first,
                **QCJob_kwargs,
            ))

            if linked:
                opt_rem["geom_opt_hessian"] = "read"
                opt_rem["scf_guess_always"] = True

            opt_QCInput = QCInput(
                molecule=orig_input.molecule,
                rem=opt_rem,
                opt=orig_input.opt,
                pcm=orig_input.pcm,
                solvent=orig_input.solvent,
                smx=orig_input.smx,
                vdw_mode=orig_input.vdw_mode,
                van_der_waals=orig_input.van_der_waals,
            )
            opt_QCInput.write_file(input_file)
            first = False

        if linked:
            opt_rem["geom_opt_hessian"] = "read"
            opt_rem["scf_guess_always"] = True

            for ii in range(max_iterations):
                yield (QCJob(
                    qchem_command=qchem_command,
                    multimode=multimode,
                    input_file=input_file,
                    output_file=output_file,
                    qclog_file=qclog_file,
                    suffix=".{}_".format(opt_method) + str(ii),
                    save_scratch=True,
                    backup=first,
                    **QCJob_kwargs,
                ))
                opt_outdata = QCOutput(output_file +
                                       ".{}_".format(opt_method) +
                                       str(ii)).data
                opt_indata = QCInput.from_file(input_file +
                                               ".{}_".format(opt_method) +
                                               str(ii))
                if opt_indata.rem["scf_algorithm"] != freq_rem["scf_algorithm"]:
                    freq_rem["scf_algorithm"] = opt_indata.rem["scf_algorithm"]
                    opt_rem["scf_algorithm"] = opt_indata.rem["scf_algorithm"]
                first = False
                if opt_outdata[
                        "structure_change"] == "unconnected_fragments" and not opt_outdata[
                            "completion"]:
                    if not transition_state:
                        warnings.warn(
                            "Unstable molecule broke into unconnected fragments which failed to optimize! Exiting..."
                        )
                        break
                energy_history.append(opt_outdata.get("final_energy"))
                freq_QCInput = QCInput(
                    molecule=opt_outdata.get(
                        "molecule_from_optimized_geometry"),
                    rem=freq_rem,
                    opt=orig_input.opt,
                    pcm=orig_input.pcm,
                    solvent=orig_input.solvent,
                    smx=orig_input.smx,
                    vdw_mode=orig_input.vdw_mode,
                    van_der_waals=orig_input.van_der_waals,
                )
                freq_QCInput.write_file(input_file)
                yield (QCJob(
                    qchem_command=qchem_command,
                    multimode=multimode,
                    input_file=input_file,
                    output_file=output_file,
                    qclog_file=qclog_file,
                    suffix=".freq_" + str(ii),
                    save_scratch=True,
                    backup=first,
                    **QCJob_kwargs,
                ))
                outdata = QCOutput(output_file + ".freq_" + str(ii)).data
                indata = QCInput.from_file(input_file + ".freq_" + str(ii))
                if indata.rem["scf_algorithm"] != freq_rem["scf_algorithm"]:
                    freq_rem["scf_algorithm"] = indata.rem["scf_algorithm"]
                    opt_rem["scf_algorithm"] = indata.rem["scf_algorithm"]
                errors = outdata.get("errors")
                if len(errors) != 0:
                    raise AssertionError(
                        "No errors should be encountered while flattening frequencies!"
                    )
                if not transition_state:
                    freq_0 = outdata.get("frequencies")[0]
                    freq_1 = outdata.get("frequencies")[1]
                    if freq_0 > 0.0:
                        warnings.warn("All frequencies positive!")
                        break
                    if abs(freq_0) < 15.0 and freq_1 > 0.0:
                        warnings.warn(
                            "One negative frequency smaller than 15.0 - not worth further flattening!"
                        )
                        break
                    if len(energy_history) > 1:
                        if abs(energy_history[-1] -
                               energy_history[-2]) < energy_diff_cutoff:
                            warnings.warn("Energy change below cutoff!")
                            break
                    opt_QCInput = QCInput(
                        molecule=opt_outdata.get(
                            "molecule_from_optimized_geometry"),
                        rem=opt_rem,
                        opt=orig_input.opt,
                        pcm=orig_input.pcm,
                        solvent=orig_input.solvent,
                        smx=orig_input.smx,
                        vdw_mode=orig_input.vdw_mode,
                        van_der_waals=orig_input.van_der_waals,
                    )
                    opt_QCInput.write_file(input_file)
                else:
                    freq_0 = outdata.get("frequencies")[0]
                    freq_1 = outdata.get("frequencies")[1]
                    freq_2 = outdata.get("frequencies")[2]
                    if freq_0 < 0.0 < freq_1:
                        warnings.warn("Saddle point found!")
                        break
                    if abs(freq_1) < 15.0 and freq_2 > 0.0:
                        warnings.warn(
                            "Second small imaginary frequency (smaller than 15.0) - not worth further flattening!"
                        )
                        break
                    opt_QCInput = QCInput(
                        molecule=opt_outdata.get(
                            "molecule_from_optimized_geometry"),
                        rem=opt_rem,
                        opt=orig_input.opt,
                        pcm=orig_input.pcm,
                        solvent=orig_input.solvent,
                        smx=orig_input.smx,
                        vdw_mode=orig_input.vdw_mode,
                        van_der_waals=orig_input.van_der_waals,
                    )
                    opt_QCInput.write_file(input_file)
            if not save_final_scratch:
                shutil.rmtree(os.path.join(os.getcwd(), "scratch"))

        else:
            orig_opt_input = QCInput.from_file(input_file)
            history = []

            for ii in range(max_iterations):
                yield (QCJob(
                    qchem_command=qchem_command,
                    multimode=multimode,
                    input_file=input_file,
                    output_file=output_file,
                    qclog_file=qclog_file,
                    suffix=".{}_".format(opt_method) + str(ii),
                    backup=first,
                    **QCJob_kwargs,
                ))
                opt_outdata = QCOutput(output_file +
                                       ".{}_".format(opt_method) +
                                       str(ii)).data
                if first:
                    orig_species = copy.deepcopy(opt_outdata.get("species"))
                    orig_charge = copy.deepcopy(opt_outdata.get("charge"))
                    orig_multiplicity = copy.deepcopy(
                        opt_outdata.get("multiplicity"))
                    orig_energy = copy.deepcopy(
                        opt_outdata.get("final_energy"))
                first = False
                if opt_outdata[
                        "structure_change"] == "unconnected_fragments" and not opt_outdata[
                            "completion"]:
                    if not transition_state:
                        warnings.warn(
                            "Unstable molecule broke into unconnected fragments which failed to optimize! Exiting..."
                        )
                        break
                freq_QCInput = QCInput(
                    molecule=opt_outdata.get(
                        "molecule_from_optimized_geometry"),
                    rem=freq_rem,
                    opt=orig_opt_input.opt,
                    pcm=orig_opt_input.pcm,
                    solvent=orig_opt_input.solvent,
                    smx=orig_opt_input.smx,
                    vdw_mode=orig_opt_input.vdw_mode,
                    van_der_waals=orig_opt_input.van_der_waals,
                )
                freq_QCInput.write_file(input_file)
                yield (QCJob(
                    qchem_command=qchem_command,
                    multimode=multimode,
                    input_file=input_file,
                    output_file=output_file,
                    qclog_file=qclog_file,
                    suffix=".freq_" + str(ii),
                    backup=first,
                    **QCJob_kwargs,
                ))
                outdata = QCOutput(output_file + ".freq_" + str(ii)).data
                errors = outdata.get("errors")
                if len(errors) != 0:
                    raise AssertionError(
                        "No errors should be encountered while flattening frequencies!"
                    )
                if not transition_state:
                    freq_0 = outdata.get("frequencies")[0]
                    freq_1 = outdata.get("frequencies")[1]
                    if freq_0 > 0.0:
                        warnings.warn("All frequencies positive!")
                        if opt_outdata.get("final_energy") > orig_energy:
                            warnings.warn(
                                "WARNING: Energy increased during frequency flattening!"
                            )
                        break
                    if abs(freq_0) < 15.0 and freq_1 > 0.0:
                        warnings.warn(
                            "One negative frequency smaller than 15.0 - not worth further flattening!"
                        )
                        break
                    if len(energy_history) > 1:
                        if abs(energy_history[-1] -
                               energy_history[-2]) < energy_diff_cutoff:
                            warnings.warn("Energy change below cutoff!")
                            break
                else:
                    freq_0 = outdata.get("frequencies")[0]
                    freq_1 = outdata.get("frequencies")[1]
                    freq_2 = outdata.get("frequencies")[2]
                    if freq_0 < 0.0 < freq_1:
                        warnings.warn("Saddle point found!")
                        break
                    if abs(freq_1) < 15.0 and freq_2 > 0.0:
                        warnings.warn(
                            "Second small imaginary frequency (smaller than 15.0) - not worth further flattening!"
                        )
                        break

                hist = {}
                hist["molecule"] = copy.deepcopy(
                    outdata.get("initial_molecule"))
                hist["geometry"] = copy.deepcopy(
                    outdata.get("initial_geometry"))
                hist["frequencies"] = copy.deepcopy(outdata.get("frequencies"))
                hist["frequency_mode_vectors"] = copy.deepcopy(
                    outdata.get("frequency_mode_vectors"))
                hist["num_neg_freqs"] = sum(
                    1 for freq in outdata.get("frequencies") if freq < 0)
                hist["energy"] = copy.deepcopy(opt_outdata.get("final_energy"))
                hist["index"] = len(history)
                hist["children"] = []
                history.append(hist)

                ref_mol = history[-1]["molecule"]
                geom_to_perturb = history[-1]["geometry"]
                negative_freq_vecs = history[-1]["frequency_mode_vectors"][
                    perturb_index]
                reversed_direction = False
                standard = True

                # If we've found one or more negative frequencies in two consecutive iterations, let's dig in
                # deeper:
                if len(history) > 1:
                    # Start by finding the latest iteration's parent:
                    if history[-1]["index"] in history[-2]["children"]:
                        parent_hist = history[-2]
                        history[-1]["parent"] = parent_hist["index"]
                    elif history[-1]["index"] in history[-3]["children"]:
                        parent_hist = history[-3]
                        history[-1]["parent"] = parent_hist["index"]
                    else:
                        raise AssertionError(
                            "ERROR: your parent should always be one or two iterations behind you! Exiting..."
                        )

                    # if the number of negative frequencies has remained constant or increased from parent to
                    # child,
                    if history[-1]["num_neg_freqs"] >= parent_hist[
                            "num_neg_freqs"]:
                        # check to see if the parent only has one child, aka only the positive perturbation has
                        # been tried,
                        # in which case just try the negative perturbation from the same parent
                        if len(parent_hist["children"]) == 1:
                            ref_mol = parent_hist["molecule"]
                            geom_to_perturb = parent_hist["geometry"]
                            negative_freq_vecs = parent_hist[
                                "frequency_mode_vectors"][perturb_index]
                            reversed_direction = True
                            standard = False
                            parent_hist["children"].append(len(history))
                        # If the parent has two children, aka both directions have been tried, then we have to
                        # get creative:
                        elif len(parent_hist["children"]) == 2:
                            # If we're dealing with just one negative frequency,
                            if parent_hist["num_neg_freqs"] == 1:
                                if history[parent_hist["children"][0]][
                                        "energy"] < history[-1]["energy"]:
                                    good_child = copy.deepcopy(
                                        history[parent_hist["children"][0]])
                                else:
                                    good_child = copy.deepcopy(history[-1])
                                if good_child["num_neg_freqs"] > 1:
                                    raise Exception(
                                        "ERROR: Child with lower energy has more negative frequencies! "
                                        "Exiting...")
                                if good_child["energy"] < parent_hist["energy"]:
                                    make_good_child_next_parent = True
                                elif (vector_list_diff(
                                        good_child["frequency_mode_vectors"]
                                    [perturb_index],
                                        parent_hist["frequency_mode_vectors"]
                                    [perturb_index],
                                ) > 0.2):
                                    make_good_child_next_parent = True
                                else:
                                    raise Exception(
                                        "ERROR: Good child not good enough! Exiting..."
                                    )
                                if make_good_child_next_parent:
                                    good_child["index"] = len(history)
                                    history.append(good_child)
                                    ref_mol = history[-1]["molecule"]
                                    geom_to_perturb = history[-1]["geometry"]
                                    negative_freq_vecs = history[-1][
                                        "frequency_mode_vectors"][
                                            perturb_index]
                            else:
                                raise Exception(
                                    "ERROR: Can't deal with multiple neg frequencies yet! Exiting..."
                                )
                        else:
                            raise AssertionError(
                                "ERROR: Parent cannot have more than two childen! Exiting..."
                            )
                    # Implicitly, if the number of negative frequencies decreased from parent to child,
                    # continue normally.
                if standard:
                    history[-1]["children"].append(len(history))

                min_molecule_perturb_scale = 0.1
                scale_grid = 10
                perturb_scale_grid = (max_molecule_perturb_scale -
                                      min_molecule_perturb_scale) / scale_grid

                structure_successfully_perturbed = False
                for molecule_perturb_scale in np.arange(
                        max_molecule_perturb_scale,
                        min_molecule_perturb_scale,
                        -perturb_scale_grid,
                ):
                    new_coords = perturb_coordinates(
                        old_coords=geom_to_perturb,
                        negative_freq_vecs=negative_freq_vecs,
                        molecule_perturb_scale=molecule_perturb_scale,
                        reversed_direction=reversed_direction,
                    )
                    new_molecule = Molecule(
                        species=orig_species,
                        coords=new_coords,
                        charge=orig_charge,
                        spin_multiplicity=orig_multiplicity,
                    )
                    if check_connectivity and not transition_state:
                        structure_successfully_perturbed = (
                            check_for_structure_changes(
                                ref_mol, new_molecule) == "no_change")
                        if structure_successfully_perturbed:
                            break
                if not structure_successfully_perturbed:
                    raise Exception(
                        "ERROR: Unable to perturb coordinates to remove negative frequency without changing "
                        "the connectivity! Exiting...")

                new_opt_QCInput = QCInput(
                    molecule=new_molecule,
                    rem=opt_rem,
                    opt=orig_opt_input.opt,
                    pcm=orig_opt_input.pcm,
                    solvent=orig_opt_input.solvent,
                    smx=orig_opt_input.smx,
                    vdw_mode=orig_opt_input.vdw_mode,
                    van_der_waals=orig_opt_input.van_der_waals,
                )
                new_opt_QCInput.write_file(input_file)
Exemplo n.º 2
0
 def setUpClass(cls):
     cls.molecule = Molecule(["C", "O", "O"],
                             [[0, 0, 0], [-1, 0, 0], [1, 0, 0]])
     cls.model = MEGNetModel.from_file(
         os.path.join(
             CWD, "../../../mvl_models/mp-2019.4.1/formation_energy.hdf5"))
Exemplo n.º 3
0
    def get_subgraphs_as_molecules(self, use_weights=False):
        """
        Retrieve subgraphs as molecules, useful for extracting
        molecules from periodic crystals.

        Will only return unique molecules, not any duplicates
        present in the crystal (a duplicate defined as an
        isomorphic subgraph).

        :param use_weights (bool): If True, only treat subgraphs
        as isomorphic if edges have the same weights. Typically,
        this means molecules will need to have the same bond
        lengths to be defined as duplicates, otherwise bond
        lengths can differ. This is a fairly robust approach,
        but will treat e.g. enantiomers as being duplicates.

        :return: list of unique Molecules in Structure
        """

        # creating a supercell is an easy way to extract
        # molecules (and not, e.g., layers of a 2D crystal)
        # without adding extra logic
        if getattr(self, '_supercell_sg', None) is None:
            self._supercell_sg = supercell_sg = self*(3,3,3)

        # make undirected to find connected subgraphs
        supercell_sg.graph = nx.Graph(supercell_sg.graph)

        # find subgraphs
        all_subgraphs = list(nx.connected_component_subgraphs(supercell_sg.graph))

        # discount subgraphs that lie across *supercell* boundaries
        # these will subgraphs representing crystals
        molecule_subgraphs = []
        for subgraph in all_subgraphs:
            intersects_boundary = any([d['to_jimage'] != (0, 0, 0)
                                      for u, v, d in subgraph.edges(data=True)])
            if not intersects_boundary:
                molecule_subgraphs.append(subgraph)

        # add specie names to graph to be able to test for isomorphism
        for subgraph in molecule_subgraphs:
            for n in subgraph:
                subgraph.add_node(n, specie=str(supercell_sg.structure[n].specie))

        # now define how we test for isomorphism
        def node_match(n1, n2):
            return n1['specie'] == n2['specie']
        def edge_match(e1, e2):
            if use_weights:
                return e1['weight'] == e2['weight']
            else:
                return True

        # prune duplicate subgraphs
        unique_subgraphs = []
        for subgraph in molecule_subgraphs:

            already_present = [nx.is_isomorphic(subgraph, g,
                                                node_match=node_match,
                                                edge_match=edge_match)
                               for g in unique_subgraphs]

            if not any(already_present):
                unique_subgraphs.append(subgraph)

        # get Molecule objects for each subgraph
        molecules = []
        for subgraph in unique_subgraphs:

            coords = [supercell_sg.structure[n].coords for n
                      in subgraph.nodes()]
            species = [supercell_sg.structure[n].specie for n
                      in subgraph.nodes()]

            molecule = Molecule(species, coords)

            # shift so origin is at center of mass
            molecule = molecule.get_centered_molecule()

            molecules.append(molecule)

        return molecules
Exemplo n.º 4
0
    def _parse(self, filename):
        start_patt = re.compile(" \(Enter \S+l101\.exe\)")
        route_patt = re.compile(" #[pPnNtT]*.*")
        link0_patt = re.compile("^\s(%.+)\s*=\s*(.+)")
        charge_mul_patt = re.compile("Charge\s+=\s*([-\\d]+)\s+"
                                     "Multiplicity\s+=\s*(\d+)")
        num_basis_func_patt = re.compile("([0-9]+)\s+basis functions")
        pcm_patt = re.compile("Polarizable Continuum Model")
        stat_type_patt = re.compile("imaginary frequencies")
        scf_patt = re.compile("E\(.*\)\s*=\s*([-\.\d]+)\s+")
        mp2_patt = re.compile("EUMP2\s*=\s*(.*)")
        oniom_patt = re.compile("ONIOM:\s+extrapolated energy\s*=\s*(.*)")
        termination_patt = re.compile("(Normal|Error) termination")
        error_patt = re.compile(
            "(! Non-Optimized Parameters !|Convergence failure)")
        mulliken_patt = re.compile(
            "^\s*(Mulliken charges|Mulliken atomic charges)")
        mulliken_charge_patt = re.compile('^\s+(\d+)\s+([A-Z][a-z]?)\s*(\S*)')
        end_mulliken_patt = re.compile(
            '(Sum of Mulliken )(.*)(charges)\s*=\s*(\D)')
        std_orientation_patt = re.compile("Standard orientation")
        end_patt = re.compile("--+")
        orbital_patt = re.compile("Alpha\s*\S+\s*eigenvalues --(.*)")
        thermo_patt = re.compile("(Zero-point|Thermal) correction(.*)="
                                 "\s+([\d\.-]+)")
        forces_on_patt = re.compile(
            "Center\s+Atomic\s+Forces\s+\(Hartrees/Bohr\)")
        forces_off_patt = re.compile("Cartesian\s+Forces:\s+Max.*RMS.*")
        forces_patt = re.compile(
            "\s+(\d+)\s+(\d+)\s+([0-9\.-]+)\s+([0-9\.-]+)\s+([0-9\.-]+)")

        freq_on_patt = re.compile(
            "Harmonic\sfrequencies\s+\(cm\*\*-1\),\sIR\sintensities.*Raman.*")
        freq_patt = re.compile("Frequencies\s--\s+(.*)")
        normal_mode_patt = re.compile(
            "\s+(\d+)\s+(\d+)\s+([0-9\.-]{4,5})\s+([0-9\.-]{4,5}).*")

        self.properly_terminated = False
        self.is_pcm = False
        self.stationary_type = "Minimum"
        self.structures = []
        self.corrections = {}
        self.energies = []
        self.pcm = None
        self.errors = []
        self.Mulliken_charges = {}
        self.link0 = {}
        self.cart_forces = []
        self.frequencies = []

        coord_txt = []
        read_coord = 0
        read_mulliken = False
        orbitals_txt = []
        parse_stage = 0
        num_basis_found = False
        terminated = False
        parse_forces = False
        forces = []
        parse_freq = False
        frequencies = []

        with zopen(filename) as f:
            for line in f:
                if parse_stage == 0:
                    if start_patt.search(line):
                        parse_stage = 1
                    elif link0_patt.match(line):
                        m = link0_patt.match(line)
                        self.link0[m.group(1)] = m.group(2)
                    elif route_patt.search(line):
                        params = read_route_line(line)
                        self.functional = params[0]
                        self.basis_set = params[1]
                        self.route = params[2]
                        self.dieze_tag = params[3]
                        parse_stage = 1
                elif parse_stage == 1:
                    if charge_mul_patt.search(line):
                        m = charge_mul_patt.search(line)
                        self.charge = int(m.group(1))
                        self.spin_mult = int(m.group(2))
                        parse_stage = 2
                elif parse_stage == 2:

                    if self.is_pcm:
                        self._check_pcm(line)

                    if "FREQ" in self.route and thermo_patt.search(line):
                        m = thermo_patt.search(line)
                        if m.group(1) == "Zero-point":
                            self.corrections["Zero-point"] = float(m.group(3))
                        else:
                            key = m.group(2).strip(" to ")
                            self.corrections[key] = float(m.group(3))

                    if read_coord:
                        if not end_patt.search(line):
                            coord_txt.append(line)
                        else:
                            read_coord = (read_coord + 1) % 4
                            if not read_coord:
                                sp = []
                                coords = []
                                for l in coord_txt[2:]:
                                    toks = l.split()
                                    sp.append(Element.from_Z(int(toks[1])))
                                    coords.append(
                                        [float(i) for i in toks[3:6]])
                                self.structures.append(Molecule(sp, coords))

                    if parse_forces:
                        m = forces_patt.search(line)
                        if m:
                            forces.extend(
                                [float(_v) for _v in m.groups()[2:5]])
                        elif forces_off_patt.search(line):
                            self.cart_forces.append(forces)
                            forces = []
                            parse_forces = False

                    elif parse_freq:
                        m = freq_patt.search(line)
                        if m:
                            values = [
                                float(_v) for _v in m.groups()[0].split()
                            ]
                            for value in values:
                                frequencies.append([value, []])
                        elif normal_mode_patt.search(line):
                            values = [float(_v) for _v in line.split()[2:]]
                            n = int(len(values) / 3)
                            for i in range(0, len(values), 3):
                                j = -n + int(i / 3)
                                frequencies[j][1].extend(values[i:i + 3])
                        elif line.find("-------------------") != -1:
                            parse_freq = False
                            self.frequencies.append(frequencies)
                            frequencies = []

                    elif termination_patt.search(line):
                        m = termination_patt.search(line)
                        if m.group(1) == "Normal":
                            self.properly_terminated = True
                            terminated = True
                    elif error_patt.search(line):
                        error_defs = {
                            "! Non-Optimized Parameters !": "Optimization "
                            "error",
                            "Convergence failure": "SCF convergence error"
                        }
                        m = error_patt.search(line)
                        self.errors.append(error_defs[m.group(1)])
                    elif (not num_basis_found) and \
                            num_basis_func_patt.search(line):
                        m = num_basis_func_patt.search(line)
                        self.num_basis_func = int(m.group(1))
                        num_basis_found = True
                    elif (not self.is_pcm) and pcm_patt.search(line):
                        self.is_pcm = True
                        self.pcm = {}
                    elif "FREQ" in self.route and "OPT" in self.route and \
                            stat_type_patt.search(line):
                        self.stationary_type = "Saddle"
                    elif mp2_patt.search(line):
                        m = mp2_patt.search(line)
                        self.energies.append(
                            float(m.group(1).replace("D", "E")))
                    elif oniom_patt.search(line):
                        m = oniom_patt.matcher(line)
                        self.energies.append(float(m.group(1)))
                    elif scf_patt.search(line):
                        m = scf_patt.search(line)
                        self.energies.append(float(m.group(1)))
                    elif std_orientation_patt.search(line):
                        coord_txt = []
                        read_coord = 1
                    elif orbital_patt.search(line):
                        orbitals_txt.append(line)
                    elif mulliken_patt.search(line):
                        mulliken_txt = []
                        read_mulliken = True
                    elif not parse_forces and forces_on_patt.search(line):
                        parse_forces = True
                    elif freq_on_patt.search(line):
                        parse_freq = True

                    if read_mulliken:
                        if not end_mulliken_patt.search(line):
                            mulliken_txt.append(line)
                        else:
                            m = end_mulliken_patt.search(line)
                            mulliken_charges = {}
                            for line in mulliken_txt:
                                if mulliken_charge_patt.search(line):
                                    m = mulliken_charge_patt.search(line)
                                    dict = {
                                        int(m.group(1)):
                                        [m.group(2),
                                         float(m.group(3))]
                                    }
                                    mulliken_charges.update(dict)
                            read_mulliken = False
                            self.Mulliken_charges = mulliken_charges

        if not terminated:
            #raise IOError("Bad Gaussian output file.")
            warnings.warn("\n" + self.filename + \
                ": Termination error or bad Gaussian output file !")
Exemplo n.º 5
0
    def test_split(self):
        bonds = [(0, 1), (4, 5)]
        alterations = {
            (2, 3): {"weight": 1.0},
            (0, 5): {"weight": 2.0},
            (1, 2): {"weight": 2.0},
            (3, 4): {"weight": 2.0},
        }
        # Perform retro-Diels-Alder reaction - turn product into reactants
        reactants = self.cyclohexene.split_molecule_subgraphs(bonds, allow_reverse=True, alterations=alterations)
        self.assertTrue(isinstance(reactants, list))

        reactants = sorted(reactants, key=len)
        # After alterations, reactants should be ethylene and butadiene
        self.assertEqual(reactants[0], self.ethylene)
        self.assertEqual(reactants[1], self.butadiene)

        with self.assertRaises(MolGraphSplitError):
            self.cyclohexene.split_molecule_subgraphs([(0, 1)])

        # Test naive charge redistribution
        hydroxide = Molecule(["O", "H"], [[0, 0, 0], [0.5, 0.5, 0.5]], charge=-1)
        oh_mg = MoleculeGraph.with_empty_graph(hydroxide)

        oh_mg.add_edge(0, 1)

        new_mgs = oh_mg.split_molecule_subgraphs([(0, 1)])
        for mg in new_mgs:
            if str(mg.molecule[0].specie) == "O":
                self.assertEqual(mg.molecule.charge, -1)
            else:
                self.assertEqual(mg.molecule.charge, 0)

        # Trying to test to ensure that remapping of nodes to atoms works
        diff_species = Molecule(
            ["C", "I", "Cl", "Br", "F"],
            [
                [0.8314, -0.2682, -0.9102],
                [1.3076, 1.3425, -2.2038],
                [-0.8429, -0.7410, -1.1554],
                [1.9841, -1.7636, -1.2953],
                [1.0098, 0.1231, 0.3916],
            ],
        )

        diff_spec_mg = MoleculeGraph.with_empty_graph(diff_species)
        diff_spec_mg.add_edge(0, 1)
        diff_spec_mg.add_edge(0, 2)
        diff_spec_mg.add_edge(0, 3)
        diff_spec_mg.add_edge(0, 4)

        for i in range(1, 5):
            bond = (0, i)

            split_mgs = diff_spec_mg.split_molecule_subgraphs([bond])
            for split_mg in split_mgs:
                species = nx.get_node_attributes(split_mg.graph, "specie")

                for j in range(len(split_mg.graph.nodes)):
                    atom = split_mg.molecule[j]
                    self.assertEqual(species[j], str(atom.specie))
Exemplo n.º 6
0
def get_ase_mol(molname):
    """convert ase molecule to pymatgen style"""
    ase_mol = molecule(molname)
    pos = ase_mol.get_positions()
    symbols = ase_mol.get_chemical_symbols()
    return Molecule(symbols, pos)
Exemplo n.º 7
0
    def generate_doc(self, dir_name, qcinput_files, qcoutput_files, multirun):
        try:
            fullpath = os.path.abspath(dir_name)
            d = jsanitize(self.additional_fields, strict=True)
            d["schema"] = {
                "code": "atomate",
                "version": QChemDrone.__version__
            }
            d["dir_name"] = fullpath

            # If a saved "orig" input file is present, parse it incase the error handler made changes
            # to the initial input molecule or rem params, which we might want to filter for later
            if len(qcinput_files) > len(qcoutput_files):
                orig_input = QCInput.from_file(
                    os.path.join(dir_name, qcinput_files.pop("orig")))
                d["orig"] = {}
                d["orig"]["molecule"] = orig_input.molecule.as_dict()
                d["orig"]["molecule"]["charge"] = int(
                    d["orig"]["molecule"]["charge"])
                d["orig"]["rem"] = orig_input.rem
                d["orig"]["opt"] = orig_input.opt
                d["orig"]["pcm"] = orig_input.pcm
                d["orig"]["solvent"] = orig_input.solvent
                d["orig"]["smx"] = orig_input.smx

            if multirun:
                d["calcs_reversed"] = self.process_qchem_multirun(
                    dir_name, qcinput_files, qcoutput_files)
            else:
                d["calcs_reversed"] = [
                    self.process_qchemrun(dir_name, taskname,
                                          qcinput_files.get(taskname),
                                          output_filename)
                    for taskname, output_filename in qcoutput_files.items()
                ]

            # reverse the calculations data order so newest calc is first
            d["calcs_reversed"].reverse()

            d["structure_change"] = []
            d["warnings"] = {}
            for entry in d["calcs_reversed"]:
                if "structure_change" in entry and "structure_change" not in d[
                        "warnings"]:
                    if entry["structure_change"] != "no_change":
                        d["warnings"]["structure_change"] = True
                if "structure_change" in entry:
                    d["structure_change"].append(entry["structure_change"])
                for key in entry["warnings"]:
                    if key not in d["warnings"]:
                        d["warnings"][key] = True

            d_calc_init = d["calcs_reversed"][-1]
            d_calc_final = d["calcs_reversed"][0]

            d["input"] = {
                "initial_molecule": d_calc_init["initial_molecule"],
                "job_type": d_calc_init["input"]["rem"]["job_type"]
            }
            d["output"] = {
                "initial_molecule": d_calc_final["initial_molecule"],
                "job_type": d_calc_final["input"]["rem"]["job_type"],
                "mulliken": d_calc_final["Mulliken"][-1]
            }
            if "RESP" in d_calc_final:
                d["output"]["resp"] = d_calc_final["RESP"][-1]
            elif "ESP" in d_calc_final:
                d["output"]["esp"] = d_calc_final["ESP"][-1]

            if d["output"]["job_type"] == "opt" or d["output"][
                    "job_type"] == "optimization":
                if "molecule_from_optimized_geometry" in d_calc_final:
                    d["output"]["optimized_molecule"] = d_calc_final[
                        "molecule_from_optimized_geometry"]
                    d["output"]["final_energy"] = d_calc_final["final_energy"]
                else:
                    d["output"]["final_energy"] = "unstable"
                if d_calc_final["opt_constraint"]:
                    d["output"]["constraint"] = [
                        d_calc_final["opt_constraint"][0],
                        float(d_calc_final["opt_constraint"][6])
                    ]
            if d["output"]["job_type"] == "freq" or d["output"][
                    "job_type"] == "frequency":
                d["output"]["frequencies"] = d_calc_final["frequencies"]
                d["output"]["enthalpy"] = d_calc_final["total_enthalpy"]
                d["output"]["entropy"] = d_calc_final["total_entropy"]
                if d["input"]["job_type"] == "opt" or d["input"][
                        "job_type"] == "optimization":
                    d["output"]["optimized_molecule"] = d_calc_final[
                        "initial_molecule"]
                    d["output"]["final_energy"] = d["calcs_reversed"][1][
                        "final_energy"]

            opt_trajectory = []
            calcs = copy.deepcopy(d["calcs_reversed"])
            calcs.reverse()
            for calc in calcs:
                job_type = calc["input"]["rem"]["job_type"]
                if job_type == "opt" or job_type == "optimization":
                    for ii, geom in enumerate(calc["geometries"]):
                        site_properties = {"Mulliken": calc["Mulliken"][ii]}
                        if "RESP" in calc:
                            site_properties["RESP"] = calc["RESP"][ii]
                        mol = Molecule(species=calc["species"],
                                       coords=geom,
                                       charge=calc["charge"],
                                       spin_multiplicity=calc["multiplicity"],
                                       site_properties=site_properties)
                        traj_entry = {"molecule": mol}
                        traj_entry["energy"] = calc["energy_trajectory"][ii]
                        opt_trajectory.append(traj_entry)
            if opt_trajectory != []:
                d["opt_trajectory"] = opt_trajectory

            if "final_energy" not in d["output"]:
                if d_calc_final["final_energy"] != None:
                    d["output"]["final_energy"] = d_calc_final["final_energy"]
                else:
                    d["output"]["final_energy"] = d_calc_final["SCF"][-1][-1][
                        0]

            if d_calc_final["completion"]:
                total_cputime = 0.0
                total_walltime = 0.0
                for calc in d["calcs_reversed"]:
                    if calc["walltime"] is not None:
                        total_walltime += calc["walltime"]
                    if calc["cputime"] is not None:
                        total_cputime += calc["cputime"]
                d["walltime"] = total_walltime
                d["cputime"] = total_cputime
            else:
                d["walltime"] = None
                d["cputime"] = None

            comp = d["output"]["initial_molecule"].composition
            d["formula_pretty"] = comp.reduced_formula
            d["formula_anonymous"] = comp.anonymized_formula
            d["formula_alphabetical"] = comp.alphabetical_formula
            d["chemsys"] = "-".join(sorted(set(d_calc_final["species"])))
            if d_calc_final["point_group"] != None:
                d["pointgroup"] = d_calc_final["point_group"]
            else:
                try:
                    d["pointgroup"] = PointGroupAnalyzer(
                        d["output"]["initial_molecule"]).sch_symbol
                except ValueError:
                    d["pointgroup"] = "PGA_error"

            bb = BabelMolAdaptor(d["output"]["initial_molecule"])
            pbmol = bb.pybel_mol
            smiles = pbmol.write(str("smi")).split()[0]
            d["smiles"] = smiles

            d["state"] = "successful" if d_calc_final[
                "completion"] else "unsuccessful"
            if "special_run_type" in d:
                if d["special_run_type"] == "frequency_flattener":
                    if d["state"] == "successful":
                        orig_num_neg_freq = sum(
                            1
                            for freq in d["calcs_reversed"][-2]["frequencies"]
                            if freq < 0)
                        orig_energy = d_calc_init["final_energy"]
                        final_num_neg_freq = sum(
                            1 for freq in d_calc_final["frequencies"]
                            if freq < 0)
                        final_energy = d["calcs_reversed"][1]["final_energy"]
                        d["num_frequencies_flattened"] = orig_num_neg_freq - final_num_neg_freq
                        if final_num_neg_freq > 0:  # If a negative frequency remains,
                            # and it's too large to ignore,
                            if final_num_neg_freq > 1 or abs(
                                    d["output"]["frequencies"][0]) >= 15.0:
                                d["state"] = "unsuccessful"  # then the flattening was unsuccessful
                        if final_energy > orig_energy:
                            d["warnings"]["energy_increased"] = True

            d["last_updated"] = datetime.datetime.utcnow()
            return d

        except Exception:
            logger.error(traceback.format_exc())
            logger.error("Error in " + os.path.abspath(dir_name) + ".\n" +
                         traceback.format_exc())
            raise
Exemplo n.º 8
0
    def parse_coords(coord_lines):
        """
        Helper method to parse coordinates.
        """
        paras = {}
        var_pattern = re.compile("^([A-Za-z]+\S*)[\s=,]+([\d\-\.]+)$")
        for l in coord_lines:
            m = var_pattern.match(l.strip())
            if m:
                paras[m.group(1)] = float(m.group(2))

        species = []
        coords = []
        # Stores whether a Zmatrix format is detected. Once a zmatrix format
        # is detected, it is assumed for the remaining of the parsing.
        zmode = False
        for l in coord_lines:
            l = l.strip()
            if not l:
                break
            if (not zmode) and GaussianInput.xyz_patt.match(l):
                m = GaussianInput.xyz_patt.match(l)
                species.append(m.group(1))
                toks = re.split("[,\s]+", l.strip())
                if len(toks) > 4:
                    coords.append([float(i) for i in toks[2:5]])
                else:
                    coords.append([float(i) for i in toks[1:4]])
            elif GaussianInput.zmat_patt.match(l):
                zmode = True
                toks = re.split("[,\s]+", l.strip())
                species.append(toks[0])
                toks.pop(0)
                if len(toks) == 0:
                    coords.append(np.array([0, 0, 0]))
                else:
                    nn = []
                    parameters = []
                    while len(toks) > 1:
                        ind = toks.pop(0)
                        data = toks.pop(0)
                        try:
                            nn.append(int(ind))
                        except ValueError:
                            nn.append(species.index(ind) + 1)
                        try:
                            val = float(data)
                            parameters.append(val)
                        except ValueError:
                            if data.startswith("-"):
                                parameters.append(-paras[data[1:]])
                            else:
                                parameters.append(paras[data])
                    if len(nn) == 1:
                        coords.append(np.array([0, 0, parameters[0]]))
                    elif len(nn) == 2:
                        coords1 = coords[nn[0] - 1]
                        coords2 = coords[nn[1] - 1]
                        bl = parameters[0]
                        angle = parameters[1]
                        axis = [0, 1, 0]
                        op = SymmOp.from_origin_axis_angle(
                            coords1, axis, angle, False)
                        coord = op.operate(coords2)
                        vec = coord - coords1
                        coord = vec * bl / np.linalg.norm(vec) + coords1
                        coords.append(coord)
                    elif len(nn) == 3:
                        coords1 = coords[nn[0] - 1]
                        coords2 = coords[nn[1] - 1]
                        coords3 = coords[nn[2] - 1]
                        bl = parameters[0]
                        angle = parameters[1]
                        dih = parameters[2]
                        v1 = coords3 - coords2
                        v2 = coords1 - coords2
                        axis = np.cross(v1, v2)
                        op = SymmOp.from_origin_axis_angle(
                            coords1, axis, angle, False)
                        coord = op.operate(coords2)
                        v1 = coord - coords1
                        v2 = coords1 - coords2
                        v3 = np.cross(v1, v2)
                        adj = get_angle(v3, axis)
                        axis = coords1 - coords2
                        op = SymmOp.from_origin_axis_angle(
                            coords1, axis, dih - adj, False)
                        coord = op.operate(coord)
                        vec = coord - coords1
                        coord = vec * bl / np.linalg.norm(vec) + coords1
                        coords.append(coord)

        def parse_species(sp_str):
            """
            The species specification can take many forms. E.g.,
            simple integers representing atomic numbers ("8"),
            actual species string ("C") or a labelled species ("C1").
            Sometimes, the species string is also not properly capitalized,
            e.g, ("c1"). This method should take care of these known formats.
            """
            try:
                return int(sp_str)
            except ValueError:
                sp = re.sub("\d", "", sp_str)
                return sp.capitalize()

        species = [parse_species(sp) for sp in species]

        return Molecule(species, coords)
Exemplo n.º 9
0
def get_subgraphs_as_molecules_all(sg, use_weights=False):
    """
    Adapatation of
    http://pymatgen.org/_modules/pymatgen/analysis/graphs.html#StructureGraph.get_subgraphs_as_molecules
    for our needs

    Args:
        sg: structure graph
        use_weights:

    Returns:
        list of molecules
    """

    # creating a supercell is an easy way to extract
    # molecules (and not, e.g., layers of a 2D crystal)
    # without adding extra logic

    supercell_sg = sg * (3, 3, 3)

    # make undirected to find connected subgraphs
    supercell_sg.graph = nx.Graph(supercell_sg.graph)

    # find subgraphs
    all_subgraphs = list(nx.connected_component_subgraphs(supercell_sg.graph))

    # discount subgraphs that lie across *supercell* boundaries
    # these will subgraphs representing crystals
    molecule_subgraphs = []
    for subgraph in all_subgraphs:
        intersects_boundary = any(
            [d["to_jimage"] != (0, 0, 0) for u, v, d in subgraph.edges(data=True)]
        )
        if not intersects_boundary:
            molecule_subgraphs.append(subgraph)

    # add specie names to graph to be able to test for isomorphism
    for subgraph in molecule_subgraphs:
        for n in subgraph:
            subgraph.add_node(n, specie=str(supercell_sg.structure[n].specie))

    # now define how we test for isomorphism
    def node_match(n1, n2):
        return n1["specie"] == n2["specie"]

    def edge_match(e1, e2):
        if use_weights:
            return e1["weight"] == e2["weight"]
        else:
            return True

    # get Molecule objects for each subgraph
    molecules = []
    for subgraph in molecule_subgraphs:

        coords = [supercell_sg.structure[n].coords for n in subgraph.nodes()]
        species = [supercell_sg.structure[n].specie for n in subgraph.nodes()]

        molecule = Molecule(species, coords)

        molecules.append(molecule)

    return molecules
Exemplo n.º 10
0
def cleave_operation():
    struct = readstructure()
    if isinstance(Structure, Molecule):
        print("cleave operation is only supported for periodic structure")
        return
    print('your choice ?')
    print('{} >>> {}'.format('1', 'cleave surface'))
    print('{} >>> {}'.format('2', 'cleave sphere cluster'))
    print('{} >>> {}'.format('3', 'cleave shell structure'))
    wait_sep()
    in_str = ""
    while in_str == "":
        in_str = input().strip()
    choice = int(in_str)
    if choice == 1:
        print(
            " input the miller index, minimum size in angstroms of layers containing atomssupercell"
        )
        print(
            " and Minimize size in angstroms of layers containing vacuum like this:"
        )
        print(' 1 0 0 | 5 | 5')
        print(' it means miller index is [1,0,0]')
        print(" min_slab_size is 5 Ang ")
        print(" min_vacum_size is 5 Ang ")
        print(" or like this : ")
        print(' 2 | 5 | 5')
        print(' it will generate all slab with miller index less than 2')

        def generate_selected_slab(in_str):
            tmp_list = in_str.split('|')
            miller_index = [int(x) for x in tmp_list[0].strip().split()]
            min_slab_size = float(tmp_list[1])
            min_vac_size = float(tmp_list[2])
            slab = SlabGenerator(struct,
                                 miller_index,
                                 min_slab_size=min_slab_size,
                                 min_vacuum_size=min_vac_size,
                                 lll_reduce=True)
            slab_struct = slab.get_slab()
            slab_struct.sort()
            miller_str = [str(i) for i in miller_index]
            filename = '_'.join(miller_str) + '.vasp'
            slab_struct.to(filename=filename, fmt='POSCAR')

        def generate_all_slab(in_str):
            tmp_list = in_str.split('|')
            max_index = int(tmp_list[0])
            min_slab_size = float(tmp_list[1])
            min_vac_size = float(tmp_list[2])
            slabs = generate_all_slabs(struct,
                                       max_index=max_index,
                                       min_slab_size=min_slab_size,
                                       min_vacuum_size=min_vac_size,
                                       lll_reduce=True)
            for slab_struct in slabs:
                slab_struct.sort()
                miller_str = [str(i) for i in slab_struct.miller_index]
                filename = '_'.join(miller_str) + '.vasp'
                slab_struct.to(filename=filename, fmt='POSCAR')

        wait_sep()
        in_str = ""
        while in_str == "":
            in_str = input().strip()
        len_para = len(in_str.split('|')[0].split())
        #if in_str.strip().startswith('a'):
        if len_para == 3:
            generate_selected_slab(in_str)
        #elif in_str.strip().startswith('b'):
        elif len_para == 1:
            generate_all_slab(in_str)
        else:
            print("unknow format")
            os._exit()

    elif choice == 2:
        print(
            " input the center atom index, sphere radius and vacuum layer thickness"
        )
        print(' 1 3.5 15')
        print(
            ' it means the sphere will be selected according to the 1st atom')
        print(
            " with the radius equals 5Ang, and vacuum layer thickness is 15 Ang"
        )
        wait_sep()
        in_str = ""
        while in_str == "":
            in_str = input().strip()
        para = in_str.split()
        center_atom = int(para[0]) - 1
        radius = float(para[1])
        vacuum = float(para[2])
        center_coord = struct[center_atom].coords
        sites = struct.get_neighbors_in_shell(center_coord, 0, radius)
        coords = [site[0].coords for site in sites]
        species = [site[0].specie for site in sites]
        mol = Molecule(coords=coords, species=species)
        max_dist = np.max(mol.distance_matrix)
        a = b = c = max_dist + vacuum
        box_struct = mol.get_boxed_structure(a, b, c)
        file_name = "sphere.vasp"
        box_struct.to(filename=file_name, fmt='poscar')
    elif choice == 3:
        print(
            " input the center atom index, start radius, shell thickness and")
        print(" vacuum layer thickness")
        print(' 1 5 10  15')
        print(
            ' it means the ball shell will be selected according to the 1st atom'
        )
        print(" with the 5< r <15Ang, and vacuum layer thickness is 15 Ang")
        wait_sep()
        in_str = ""
        while in_str == "":
            in_str = input().strip()
        para = in_str.split()
        center_atom = int(para[0]) - 1
        radius = float(para[1])
        shell = float(para[2])
        vacuum = float(para[3])
        center_coord = struct[center_atom].coords
        sites = struct.get_neighbors_in_shell(center_coord, radius, shell)
        coords = [site[0].coords for site in sites]
        species = [site[0].specie for site in sites]
        mol = Molecule(coords=coords, species=species)
        max_dist = np.max(mol.distance_matrix)
        a = b = c = max_dist + vacuum
        box_struct = mol.get_boxed_structure(a, b, c)
        file_name = "shell.vasp"
        box_struct.to(filename=file_name, fmt='poscar')

    else:
        print("unkown choice")
        return
Exemplo n.º 11
0
    def _parse(self, filename):
        start_patt = re.compile(" \(Enter \S+l101\.exe\)")
        route_patt = re.compile(" #[pPnNtT]*.*")
        charge_mul_patt = re.compile("Charge\s+=\s*([-\\d]+)\s+"
                                     "Multiplicity\s+=\s*(\d+)")
        num_basis_func_patt = re.compile("([0-9]+)\s+basis functions")
        pcm_patt = re.compile("Polarizable Continuum Model")
        stat_type_patt = re.compile("imaginary frequencies")
        scf_patt = re.compile("E\(.*\)\s*=\s*([-\.\d]+)\s+")
        mp2_patt = re.compile("EUMP2\s*=\s*(.*)")
        oniom_patt = re.compile("ONIOM:\s+extrapolated energy\s*=\s*(.*)")
        termination_patt = re.compile("(Normal|Error) termination")
        error_patt = re.compile(
            "(! Non-Optimized Parameters !|Convergence failure)")
        mulliken_patt = re.compile("^\s*Mulliken atomic charges")
        mulliken_charge_patt = re.compile('^\s+(\d+)\s+([A-Z][a-z]?)\s*(\S*)')
        end_mulliken_patt = re.compile(
            '(Sum of Mulliken )(.*)(charges)\s*=\s*(\D)')
        std_orientation_patt = re.compile("Standard orientation")
        end_patt = re.compile("--+")
        orbital_patt = re.compile("Alpha\s*\S+\s*eigenvalues --(.*)")
        thermo_patt = re.compile("(Zero-point|Thermal) correction(.*)="
                                 "\s+([\d\.-]+)")

        self.properly_terminated = False
        self.is_pcm = False
        self.stationary_type = "Minimum"
        self.structures = []
        self.corrections = {}
        self.energies = []
        self.pcm = None
        self.errors = []
        self.Mulliken_charges = {}

        coord_txt = []
        read_coord = 0
        read_mulliken = 0
        orbitals_txt = []
        parse_stage = 0
        num_basis_found = False
        terminated = False

        with zopen(filename) as f:
            for line in f:
                if parse_stage == 0:
                    if start_patt.search(line):
                        parse_stage = 1
                    elif route_patt.search(line):
                        self.route = {}
                        for tok in line.split():
                            sub_tok = tok.strip().split("=")
                            key = sub_tok[0].upper()
                            self.route[key] = sub_tok[1].upper() \
                                if len(sub_tok) > 1 else ""
                            m = re.match("(\w+)/([^/]+)", key)
                            if m:
                                self.functional = m.group(1)
                                self.basis_set = m.group(2)
                elif parse_stage == 1:
                    if charge_mul_patt.search(line):
                        m = charge_mul_patt.search(line)
                        self.charge = int(m.group(1))
                        self.spin_mult = int(m.group(2))
                        parse_stage = 2
                elif parse_stage == 2:

                    if self.is_pcm:
                        self._check_pcm(line)

                    if "FREQ" in self.route and thermo_patt.search(line):
                        m = thermo_patt.search(line)
                        if m.group(1) == "Zero-point":
                            self.corrections["Zero-point"] = float(m.group(3))
                        else:
                            key = m.group(2).strip(" to ")
                            self.corrections[key] = float(m.group(3))

                    if read_mulliken:
                        if not end_mulliken_patt.search(line):
                            mulliken_txt.append(line)
                        else:
                            m = end_mulliken_patt.search(line)
                            mulliken_charges = {}
                            for line in mulliken_txt:
                                if mulliken_charge_patt.search(line):
                                    m = mulliken_charge_patt.search(line)
                                    dict = {
                                        int(m.group(1)):
                                        [m.group(2),
                                         float(m.group(3))]
                                    }
                                    mulliken_charges.update(dict)
                            read_mulliken = 0
                            self.Mulliken_charges = mulliken_charges

                    if read_coord:
                        if not end_patt.search(line):
                            coord_txt.append(line)
                        else:
                            read_coord = (read_coord + 1) % 4
                            if not read_coord:
                                sp = []
                                coords = []
                                for l in coord_txt[2:]:
                                    toks = l.split()
                                    sp.append(Element.from_Z(int(toks[1])))
                                    coords.append(
                                        [float(i) for i in toks[3:6]])
                                self.structures.append(Molecule(sp, coords))
                    elif termination_patt.search(line):
                        m = termination_patt.search(line)
                        if m.group(1) == "Normal":
                            self.properly_terminated = True
                        terminated = True
                    elif error_patt.search(line):
                        error_defs = {
                            "! Non-Optimized Parameters !":
                            "Optimization error",
                            "Convergence failure": "SCF convergence error"
                        }
                        m = error_patt.search(line)
                        self.errors.append(error_defs[m.group(1)])
                    elif (not num_basis_found) and \
                            num_basis_func_patt.search(line):
                        m = num_basis_func_patt.search(line)
                        self.num_basis_func = int(m.group(1))
                        num_basis_found = True
                    elif (not self.is_pcm) and pcm_patt.search(line):
                        self.is_pcm = True
                        self.pcm = {}
                    elif "FREQ" in self.route and "OPT" in self.route and \
                            stat_type_patt.search(line):
                        self.stationary_type = "Saddle"
                    elif mp2_patt.search(line):
                        m = mp2_patt.search(line)
                        self.energies.append(
                            float(m.group(1).replace("D", "E")))
                    elif oniom_patt.search(line):
                        m = oniom_patt.matcher(line)
                        self.energies.append(float(m.group(1)))
                    elif scf_patt.search(line):
                        m = scf_patt.search(line)
                        self.energies.append(float(m.group(1)))
                    elif std_orientation_patt.search(line):
                        coord_txt = []
                        read_coord = 1
                    elif orbital_patt.search(line):
                        orbitals_txt.append(line)
                    elif mulliken_patt.search(line):
                        mulliken_txt = []
                        read_mulliken = 1
        if not terminated:
            raise IOError("Bad Gaussian output file.")
Exemplo n.º 12
0
"""
Dummy test systems
"""
from pymatgen.core import Composition, Structure, Lattice, Molecule

from ._data_conversion import to_array
from ._inspect import get_param_types

DUMMY_OBJECTS = {
    'str': 'H2O',
    'composition': Composition('H2O'),
    'structure': Structure(Lattice.cubic(3.167),
                           ['Mo', 'Mo'],
                           [[0, 0, 0], [0.5, 0.5, 0.5]]),
    'molecule': Molecule(['C', 'O'], [[0, 0, 0], [1, 0, 0]])
}


def get_describer_dummy_obj(instance):
    """
    For a describers, get a dummy object for transform_one.
    This relies on the type hint.

    Args:
        instance (BaseDescriber): describers instance
    """
    obj_type = getattr(instance, "describer_type", None)
    if obj_type is not None:
        return DUMMY_OBJECTS[obj_type.lower()]
    arg_types = get_param_types(instance.transform_one)
    arg_type = list(arg_types.values())[0]
Exemplo n.º 13
0
    def _parse_job(self, output):
        energy_patt = re.compile("Total \w+ energy\s+=\s+([\.\-\d]+)")

        #In cosmo solvation results; gas phase energy = -152.5044774212

        energy_gas_patt = re.compile("gas phase energy\s+=\s+([\.\-\d]+)")

        #In cosmo solvation results; sol phase energy = -152.5044774212

        energy_sol_patt = re.compile("sol phase energy\s+=\s+([\.\-\d]+)")

        coord_patt = re.compile("\d+\s+(\w+)\s+[\.\-\d]+\s+([\.\-\d]+)\s+"
                                "([\.\-\d]+)\s+([\.\-\d]+)")
        corrections_patt = re.compile("([\w\-]+ correction to \w+)\s+="
                                      "\s+([\.\-\d]+)")
        preamble_patt = re.compile("(No. of atoms|No. of electrons"
                                   "|SCF calculation type|Charge|Spin "
                                   "multiplicity)\s*:\s*(\S+)")
        error_defs = {
            "calculations not reaching convergence": "Bad convergence",
            "Calculation failed to converge": "Bad convergence",
            "geom_binvr: #indep variables incorrect": "autoz error",
            "dft optimize failed": "Geometry optimization failed"
        }

        data = {}
        energies = []
        frequencies = None
        corrections = {}
        molecules = []
        species = []
        coords = []
        errors = []
        basis_set = {}
        bset_header = []
        parse_geom = False
        parse_freq = False
        parse_bset = False
        job_type = ""
        for l in output.split("\n"):
            for e, v in error_defs.items():
                if l.find(e) != -1:
                    errors.append(v)
            if parse_geom:
                if l.strip() == "Atomic Mass":
                    molecules.append(Molecule(species, coords))
                    species = []
                    coords = []
                    parse_geom = False
                else:
                    m = coord_patt.search(l)
                    if m:
                        species.append(m.group(1).capitalize())
                        coords.append([
                            float(m.group(2)),
                            float(m.group(3)),
                            float(m.group(4))
                        ])
            if parse_freq:
                if len(l.strip()) == 0:
                    if len(frequencies[-1][1]) == 0:
                        continue
                    else:
                        parse_freq = False
                else:
                    vibs = [float(vib) for vib in l.strip().split()[1:]]
                    num_vibs = len(vibs)
                    for mode, dis in zip(frequencies[-num_vibs:], vibs):
                        mode[1].append(dis)

            elif parse_bset:
                if l.strip() == "":
                    parse_bset = False
                else:
                    toks = l.split()
                    if toks[0] != "Tag" and not re.match("\-+", toks[0]):
                        basis_set[toks[0]] = dict(
                            zip(bset_header[1:], toks[1:]))
                    elif toks[0] == "Tag":
                        bset_header = toks
                        bset_header.pop(4)
                        bset_header = [h.lower() for h in bset_header]
            else:
                m = energy_patt.search(l)
                if m:
                    energies.append(Energy(m.group(1), "Ha").to("eV"))
                    continue

                m = energy_gas_patt.search(l)
                if m:
                    cosmo_scf_energy = energies[-1]
                    energies[-1] = dict()
                    energies[-1].update({"cosmo scf": cosmo_scf_energy})
                    energies[-1].update(
                        {"gas phase": Energy(m.group(1), "Ha").to("eV")})

                m = energy_sol_patt.search(l)
                if m:
                    energies[-1].update(
                        {"sol phase": Energy(m.group(1), "Ha").to("eV")})

                m = preamble_patt.search(l)
                if m:
                    try:
                        val = int(m.group(2))
                    except ValueError:
                        val = m.group(2)
                    k = m.group(1).replace("No. of ", "n").replace(" ", "_")
                    data[k.lower()] = val
                elif l.find("Geometry \"geometry\"") != -1:
                    parse_geom = True
                elif l.find("Summary of \"ao basis\"") != -1:
                    parse_bset = True
                elif l.find("P.Frequency") != -1:
                    parse_freq = True
                    if not frequencies:
                        frequencies = []
                    frequencies.extend([(float(freq), [])
                                        for freq in l.strip().split()[1:]])
                elif job_type == "" and l.strip().startswith("NWChem"):
                    job_type = l.strip()
                    if job_type == "NWChem DFT Module" and \
                            "COSMO solvation results" in output:
                        job_type += " COSMO"
                else:
                    m = corrections_patt.search(l)
                    if m:
                        corrections[m.group(1)] = FloatWithUnit(
                            m.group(2), "kJ mol^-1").to("eV atom^-1")
        if frequencies:
            for freq, mode in frequencies:
                mode[:] = zip(*[iter(mode)] * 3)
        data.update({
            "job_type": job_type,
            "energies": energies,
            "corrections": corrections,
            "molecules": molecules,
            "basis_set": basis_set,
            "errors": errors,
            "has_error": len(errors) > 0,
            "frequencies": frequencies
        })

        return data
Exemplo n.º 14
0
    def from_string(cls, string_input):
        """
        Read an NwInput from a string. Currently tested to work with
        files generated from this class itself.

        Args:
            string_input: string_input to parse.

        Returns:
            NwInput object
        """
        directives = []
        tasks = []
        charge = None
        spin_multiplicity = None
        title = None
        basis_set = None
        theory_directives = {}
        geom_options = None
        symmetry_options = None
        memory_options = None
        lines = string_input.strip().split("\n")
        while len(lines) > 0:
            l = lines.pop(0).strip()
            if l == "":
                continue

            toks = l.split()
            if toks[0].lower() == "geometry":
                geom_options = toks[1:]
                l = lines.pop(0).strip()
                toks = l.split()
                if toks[0].lower() == "symmetry":
                    symmetry_options = toks[1:]
                    l = lines.pop(0).strip()
                #Parse geometry
                species = []
                coords = []
                while l.lower() != "end":
                    toks = l.split()
                    species.append(toks[0])
                    coords.append([float(i) for i in toks[1:]])
                    l = lines.pop(0).strip()
                mol = Molecule(species, coords)
            elif toks[0].lower() == "charge":
                charge = int(toks[1])
            elif toks[0].lower() == "title":
                title = l[5:].strip().strip("\"")
            elif toks[0].lower() == "basis":
                #Parse basis sets
                l = lines.pop(0).strip()
                basis_set = {}
                while l.lower() != "end":
                    toks = l.split()
                    basis_set[toks[0]] = toks[-1].strip("\"")
                    l = lines.pop(0).strip()
            elif toks[0].lower() in NwTask.theories:
                #Parse theory directives.
                theory = toks[0].lower()
                l = lines.pop(0).strip()
                theory_directives[theory] = {}
                while l.lower() != "end":
                    toks = l.split()
                    theory_directives[theory][toks[0]] = toks[-1]
                    if toks[0] == "mult":
                        spin_multiplicity = float(toks[1])
                    l = lines.pop(0).strip()
            elif toks[0].lower() == "task":
                tasks.append(
                    NwTask(charge=charge,
                           spin_multiplicity=spin_multiplicity,
                           title=title,
                           theory=toks[1],
                           operation=toks[2],
                           basis_set=basis_set,
                           theory_directives=theory_directives.get(toks[1])))
            elif toks[0].lower() == "memory":
                memory_options = ' '.join(toks[1:])
            else:
                directives.append(l.strip().split())

        return NwInput(mol,
                       tasks=tasks,
                       directives=directives,
                       geometry_options=geom_options,
                       symmetry_options=symmetry_options,
                       memory_options=memory_options)
Exemplo n.º 15
0
    def opt_with_frequency_flattener(cls,
                                     qchem_command,
                                     multimode="openmp",
                                     input_file="mol.qin",
                                     output_file="mol.qout",
                                     qclog_file="mol.qclog",
                                     max_iterations=10,
                                     max_molecule_perturb_scale=0.3,
                                     check_connectivity=True,
                                     **QCJob_kwargs):
        """
        Optimize a structure and calculate vibrational frequencies to check if the
        structure is in a true minima. If a frequency is negative, iteratively
        perturbe the geometry, optimize, and recalculate frequencies until all are
        positive, aka a true minima has been found.

        Args:
            qchem_command (str): Command to run QChem.
            multimode (str): Parallelization scheme, either openmp or mpi.
            input_file (str): Name of the QChem input file.
            output_file (str): Name of the QChem output file.
            max_iterations (int): Number of perturbation -> optimization -> frequency
                iterations to perform. Defaults to 10.
            max_molecule_perturb_scale (float): The maximum scaled perturbation that
                can be applied to the molecule. Defaults to 0.3.
            check_connectivity (bool): Whether to check differences in connectivity
                introduced by structural perturbation. Defaults to True.
            **QCJob_kwargs: Passthrough kwargs to QCJob. See
                :class:`custodian.qchem.jobs.QCJob`.
        """

        min_molecule_perturb_scale = 0.1
        scale_grid = 10
        perturb_scale_grid = (max_molecule_perturb_scale -
                              min_molecule_perturb_scale) / scale_grid

        if not os.path.exists(input_file):
            raise AssertionError('Input file must be present!')
        orig_opt_input = QCInput.from_file(input_file)
        orig_opt_rem = copy.deepcopy(orig_opt_input.rem)
        orig_freq_rem = copy.deepcopy(orig_opt_input.rem)
        orig_freq_rem["job_type"] = "freq"
        first = True
        reversed_direction = False
        num_neg_freqs = []

        for ii in range(max_iterations):
            yield (QCJob(qchem_command=qchem_command,
                         multimode=multimode,
                         input_file=input_file,
                         output_file=output_file,
                         qclog_file=qclog_file,
                         suffix=".opt_" + str(ii),
                         backup=first,
                         **QCJob_kwargs))
            first = False
            opt_outdata = QCOutput(output_file + ".opt_" + str(ii)).data
            if opt_outdata[
                    "structure_change"] == "unconnected_fragments" and not opt_outdata[
                        "completion"]:
                print(
                    "Unstable molecule broke into unconnected fragments which failed to optimize! Exiting..."
                )
                break
            else:
                freq_QCInput = QCInput(molecule=opt_outdata.get(
                    "molecule_from_optimized_geometry"),
                                       rem=orig_freq_rem,
                                       opt=orig_opt_input.opt,
                                       pcm=orig_opt_input.pcm,
                                       solvent=orig_opt_input.solvent)
                freq_QCInput.write_file(input_file)
                yield (QCJob(qchem_command=qchem_command,
                             multimode=multimode,
                             input_file=input_file,
                             output_file=output_file,
                             qclog_file=qclog_file,
                             suffix=".freq_" + str(ii),
                             backup=first,
                             **QCJob_kwargs))
                outdata = QCOutput(output_file + ".freq_" + str(ii)).data
                errors = outdata.get("errors")
                if len(errors) != 0:
                    raise AssertionError(
                        'No errors should be encountered while flattening frequencies!'
                    )
                if outdata.get('frequencies')[0] > 0.0:
                    print("All frequencies positive!")
                    break
                else:
                    num_neg_freqs += [
                        sum(1 for freq in outdata.get('frequencies')
                            if freq < 0)
                    ]
                    if len(num_neg_freqs) > 1:
                        if num_neg_freqs[-1] == num_neg_freqs[
                                -2] and not reversed_direction:
                            reversed_direction = True
                        elif num_neg_freqs[-1] == num_neg_freqs[
                                -2] and reversed_direction:
                            if len(num_neg_freqs) < 3:
                                raise AssertionError(
                                    "ERROR: This should only be possible after at least three frequency flattening iterations! Exiting..."
                                )
                            else:
                                raise Exception(
                                    "ERROR: Reversing the perturbation direction still could not flatten any frequencies. Exiting..."
                                )
                        elif num_neg_freqs[-1] != num_neg_freqs[
                                -2] and reversed_direction:
                            reversed_direction = False

                    negative_freq_vecs = outdata.get(
                        "frequency_mode_vectors")[0]
                    structure_successfully_perturbed = False

                    for molecule_perturb_scale in np.arange(
                            max_molecule_perturb_scale,
                            min_molecule_perturb_scale, -perturb_scale_grid):
                        new_coords = perturb_coordinates(
                            old_coords=outdata.get("initial_geometry"),
                            negative_freq_vecs=negative_freq_vecs,
                            molecule_perturb_scale=molecule_perturb_scale,
                            reversed_direction=reversed_direction)
                        new_molecule = Molecule(
                            species=outdata.get('species'),
                            coords=new_coords,
                            charge=outdata.get('charge'),
                            spin_multiplicity=outdata.get('multiplicity'))
                        if check_connectivity:
                            old_molgraph = MoleculeGraph.with_local_env_strategy(
                                outdata.get("initial_molecule"),
                                OpenBabelNN(),
                                reorder=False,
                                extend_structure=False)
                            new_molgraph = MoleculeGraph.with_local_env_strategy(
                                new_molecule,
                                OpenBabelNN(),
                                reorder=False,
                                extend_structure=False)
                            if old_molgraph.isomorphic_to(new_molgraph):
                                structure_successfully_perturbed = True
                                break
                    if not structure_successfully_perturbed:
                        raise Exception(
                            "ERROR: Unable to perturb coordinates to remove negative frequency without changing the connectivity! Exiting..."
                        )

                    new_opt_QCInput = QCInput(molecule=new_molecule,
                                              rem=orig_opt_rem,
                                              opt=orig_opt_input.opt,
                                              pcm=orig_opt_input.pcm,
                                              solvent=orig_opt_input.solvent)
                    new_opt_QCInput.write_file(input_file)
Exemplo n.º 16
0
    def opt_with_frequency_flattener(cls,
                                     qchem_command,
                                     multimode="openmp",
                                     input_file="mol.qin",
                                     output_file="mol.qout",
                                     qclog_file="mol.qclog",
                                     max_iterations=10,
                                     max_molecule_perturb_scale=0.3,
                                     reversed_direction=False,
                                     ignore_connectivity=False,
                                     **QCJob_kwargs):
        """
        Optimize a structure and calculate vibrational frequencies to check if the
        structure is in a true minima. If a frequency is negative, iteratively
        perturbe the geometry, optimize, and recalculate frequencies until all are
        positive, aka a true minima has been found.

        Args:
            qchem_command (str): Command to run QChem.
            multimode (str): Parallelization scheme, either openmp or mpi.
            input_file (str): Name of the QChem input file.
            output_file (str): Name of the QChem output file.
            max_iterations (int): Number of perturbation -> optimization -> frequency
                iterations to perform. Defaults to 10.
            max_molecule_perturb_scale (float): The maximum scaled perturbation that
                can be applied to the molecule. Defaults to 0.3.
            reversed_direction (bool): Whether to reverse the direction of the
                vibrational frequency vectors. Defaults to False.
            ignore_connectivity (bool): Whether to ignore differences in connectivity
                introduced by structural perturbation. Defaults to False.
            **QCJob_kwargs: Passthrough kwargs to QCJob. See
                :class:`custodian.qchem.jobs.QCJob`.
        """

        min_molecule_perturb_scale = 0.1
        scale_grid = 10
        perturb_scale_grid = (max_molecule_perturb_scale -
                              min_molecule_perturb_scale) / scale_grid
        msc = MoleculeStructureComparator()

        if not os.path.exists(input_file):
            raise AssertionError('Input file must be present!')
        orig_opt_input = QCInput.from_file(input_file)
        orig_opt_rem = copy.deepcopy(orig_opt_input.rem)
        orig_freq_rem = copy.deepcopy(orig_opt_input.rem)
        orig_freq_rem["job_type"] = "freq"
        first = True

        for ii in range(max_iterations):
            yield (QCJob(qchem_command=qchem_command,
                         multimode=multimode,
                         input_file=input_file,
                         output_file=output_file,
                         qclog_file=qclog_file,
                         suffix=".opt_" + str(ii),
                         backup=first,
                         **QCJob_kwargs))
            first = False
            opt_outdata = QCOutput(output_file + ".opt_" + str(ii)).data
            if opt_outdata["structure_change"] == "unconnected_fragments":
                print(
                    "Unstable molecule broke into unconnected fragments! Exiting..."
                )
                break
            else:
                freq_QCInput = QCInput(molecule=opt_outdata.get(
                    "molecule_from_optimized_geometry"),
                                       rem=orig_freq_rem,
                                       opt=orig_opt_input.opt,
                                       pcm=orig_opt_input.pcm,
                                       solvent=orig_opt_input.solvent)
                freq_QCInput.write_file(input_file)
                yield (QCJob(qchem_command=qchem_command,
                             multimode=multimode,
                             input_file=input_file,
                             output_file=output_file,
                             qclog_file=qclog_file,
                             suffix=".freq_" + str(ii),
                             backup=first,
                             **QCJob_kwargs))
                outdata = QCOutput(output_file + ".freq_" + str(ii)).data
                errors = outdata.get("errors")
                if len(errors) != 0:
                    raise AssertionError(
                        'No errors should be encountered while flattening frequencies!'
                    )
                if outdata.get('frequencies')[0] > 0.0:
                    print("All frequencies positive!")
                    break
                else:
                    negative_freq_vecs = outdata.get(
                        "frequency_mode_vectors")[0]
                    old_coords = outdata.get("initial_geometry")
                    old_molecule = outdata.get("initial_molecule")
                    structure_successfully_perturbed = False

                    for molecule_perturb_scale in np.arange(
                            max_molecule_perturb_scale,
                            min_molecule_perturb_scale, -perturb_scale_grid):
                        new_coords = perturb_coordinates(
                            old_coords=old_coords,
                            negative_freq_vecs=negative_freq_vecs,
                            molecule_perturb_scale=molecule_perturb_scale,
                            reversed_direction=reversed_direction)
                        new_molecule = Molecule(
                            species=outdata.get('species'),
                            coords=new_coords,
                            charge=outdata.get('charge'),
                            spin_multiplicity=outdata.get('multiplicity'))
                        if msc.are_equal(old_molecule,
                                         new_molecule) or ignore_connectivity:
                            structure_successfully_perturbed = True
                            break
                    if not structure_successfully_perturbed:
                        raise Exception(
                            "Unable to perturb coordinates to remove negative frequency without changing the bonding structure"
                        )

                    new_opt_QCInput = QCInput(molecule=new_molecule,
                                              rem=orig_opt_rem,
                                              opt=orig_opt_input.opt,
                                              pcm=orig_opt_input.pcm,
                                              solvent=orig_opt_input.solvent)
                    new_opt_QCInput.write_file(input_file)
Exemplo n.º 17
0
    def opt_with_frequency_flattener(cls,
                                     qchem_command,
                                     multimode="openmp",
                                     input_file="mol.qin",
                                     output_file="mol.qout",
                                     qclog_file="mol.qclog",
                                     max_iterations=10,
                                     max_molecule_perturb_scale=0.3,
                                     check_connectivity=True,
                                     linked=True,
                                     **QCJob_kwargs):
        """
        Optimize a structure and calculate vibrational frequencies to check if the
        structure is in a true minima. If a frequency is negative, iteratively
        perturbe the geometry, optimize, and recalculate frequencies until all are
        positive, aka a true minima has been found.

        Args:
            qchem_command (str): Command to run QChem.
            multimode (str): Parallelization scheme, either openmp or mpi.
            input_file (str): Name of the QChem input file.
            output_file (str): Name of the QChem output file.
            max_iterations (int): Number of perturbation -> optimization -> frequency
                iterations to perform. Defaults to 10.
            max_molecule_perturb_scale (float): The maximum scaled perturbation that
                can be applied to the molecule. Defaults to 0.3.
            check_connectivity (bool): Whether to check differences in connectivity
                introduced by structural perturbation. Defaults to True.
            **QCJob_kwargs: Passthrough kwargs to QCJob. See
                :class:`custodian.qchem.jobs.QCJob`.
        """
        if not os.path.exists(input_file):
            raise AssertionError("Input file must be present!")

        if linked:

            energy_diff_cutoff = 0.0000001

            orig_input = QCInput.from_file(input_file)
            freq_rem = copy.deepcopy(orig_input.rem)
            freq_rem["job_type"] = "freq"
            opt_rem = copy.deepcopy(orig_input.rem)
            opt_rem["geom_opt_hessian"] = "read"
            opt_rem["scf_guess_always"] = True
            first = True
            energy_history = []

            for ii in range(max_iterations):
                yield (QCJob(qchem_command=qchem_command,
                             multimode=multimode,
                             input_file=input_file,
                             output_file=output_file,
                             qclog_file=qclog_file,
                             suffix=".opt_" + str(ii),
                             scratch_dir=os.getcwd(),
                             save_scratch=True,
                             save_name="chain_scratch",
                             backup=first,
                             **QCJob_kwargs))
                opt_outdata = QCOutput(output_file + ".opt_" + str(ii)).data
                first = False
                if (opt_outdata["structure_change"] == "unconnected_fragments"
                        and not opt_outdata["completion"]):
                    print(
                        "Unstable molecule broke into unconnected fragments which failed to optimize! Exiting..."
                    )
                    break
                else:
                    energy_history.append(opt_outdata.get("final_energy"))
                    freq_QCInput = QCInput(
                        molecule=opt_outdata.get(
                            "molecule_from_optimized_geometry"),
                        rem=freq_rem,
                        opt=orig_input.opt,
                        pcm=orig_input.pcm,
                        solvent=orig_input.solvent,
                        smx=orig_input.smx,
                    )
                    freq_QCInput.write_file(input_file)
                    yield (QCJob(qchem_command=qchem_command,
                                 multimode=multimode,
                                 input_file=input_file,
                                 output_file=output_file,
                                 qclog_file=qclog_file,
                                 suffix=".freq_" + str(ii),
                                 scratch_dir=os.getcwd(),
                                 save_scratch=True,
                                 save_name="chain_scratch",
                                 backup=first,
                                 **QCJob_kwargs))
                    outdata = QCOutput(output_file + ".freq_" + str(ii)).data
                    errors = outdata.get("errors")
                    if len(errors) != 0:
                        raise AssertionError(
                            "No errors should be encountered while flattening frequencies!"
                        )
                    if outdata.get("frequencies")[0] > 0.0:
                        print("All frequencies positive!")
                        break
                    elif (abs(outdata.get("frequencies")[0]) < 15.0
                          and outdata.get("frequencies")[1] > 0.0):
                        print(
                            "One negative frequency smaller than 15.0 - not worth further flattening!"
                        )
                        break
                    else:
                        if len(energy_history) > 1:
                            if (abs(energy_history[-1] - energy_history[-2]) <
                                    energy_diff_cutoff):
                                print("Energy change below cutoff!")
                                break
                        opt_QCInput = QCInput(
                            molecule=opt_outdata.get(
                                "molecule_from_optimized_geometry"),
                            rem=opt_rem,
                            opt=orig_input.opt,
                            pcm=orig_input.pcm,
                            solvent=orig_input.solvent,
                            smx=orig_input.smx,
                        )
                        opt_QCInput.write_file(input_file)
            if os.path.exists(os.path.join(os.getcwd(), "chain_scratch")):
                shutil.rmtree(os.path.join(os.getcwd(), "chain_scratch"))

        else:
            if not os.path.exists(input_file):
                raise AssertionError("Input file must be present!")
            orig_opt_input = QCInput.from_file(input_file)
            orig_opt_rem = copy.deepcopy(orig_opt_input.rem)
            orig_freq_rem = copy.deepcopy(orig_opt_input.rem)
            orig_freq_rem["job_type"] = "freq"
            first = True
            history = []

            for ii in range(max_iterations):
                yield (QCJob(qchem_command=qchem_command,
                             multimode=multimode,
                             input_file=input_file,
                             output_file=output_file,
                             qclog_file=qclog_file,
                             suffix=".opt_" + str(ii),
                             backup=first,
                             **QCJob_kwargs))
                opt_outdata = QCOutput(output_file + ".opt_" + str(ii)).data
                if first:
                    orig_species = copy.deepcopy(opt_outdata.get("species"))
                    orig_charge = copy.deepcopy(opt_outdata.get("charge"))
                    orig_multiplicity = copy.deepcopy(
                        opt_outdata.get("multiplicity"))
                    orig_energy = copy.deepcopy(
                        opt_outdata.get("final_energy"))
                first = False
                if (opt_outdata["structure_change"] == "unconnected_fragments"
                        and not opt_outdata["completion"]):
                    print(
                        "Unstable molecule broke into unconnected fragments which failed to optimize! Exiting..."
                    )
                    break
                else:
                    freq_QCInput = QCInput(
                        molecule=opt_outdata.get(
                            "molecule_from_optimized_geometry"),
                        rem=orig_freq_rem,
                        opt=orig_opt_input.opt,
                        pcm=orig_opt_input.pcm,
                        solvent=orig_opt_input.solvent,
                        smx=orig_opt_input.smx,
                    )
                    freq_QCInput.write_file(input_file)
                    yield (QCJob(qchem_command=qchem_command,
                                 multimode=multimode,
                                 input_file=input_file,
                                 output_file=output_file,
                                 qclog_file=qclog_file,
                                 suffix=".freq_" + str(ii),
                                 backup=first,
                                 **QCJob_kwargs))
                    outdata = QCOutput(output_file + ".freq_" + str(ii)).data
                    errors = outdata.get("errors")
                    if len(errors) != 0:
                        raise AssertionError(
                            "No errors should be encountered while flattening frequencies!"
                        )
                    if outdata.get("frequencies")[0] > 0.0:
                        print("All frequencies positive!")
                        if opt_outdata.get("final_energy") > orig_energy:
                            print(
                                "WARNING: Energy increased during frequency flattening!"
                            )
                        break
                    else:
                        hist = {}
                        hist["molecule"] = copy.deepcopy(
                            outdata.get("initial_molecule"))
                        hist["geometry"] = copy.deepcopy(
                            outdata.get("initial_geometry"))
                        hist["frequencies"] = copy.deepcopy(
                            outdata.get("frequencies"))
                        hist["frequency_mode_vectors"] = copy.deepcopy(
                            outdata.get("frequency_mode_vectors"))
                        hist["num_neg_freqs"] = sum(
                            1 for freq in outdata.get("frequencies")
                            if freq < 0)
                        hist["energy"] = copy.deepcopy(
                            opt_outdata.get("final_energy"))
                        hist["index"] = len(history)
                        hist["children"] = []
                        history.append(hist)

                        ref_mol = history[-1]["molecule"]
                        geom_to_perturb = history[-1]["geometry"]
                        negative_freq_vecs = history[-1][
                            "frequency_mode_vectors"][0]
                        reversed_direction = False
                        standard = True

                        # If we've found one or more negative frequencies in two consecutive iterations, let's dig in
                        # deeper:
                        if len(history) > 1:
                            # Start by finding the latest iteration's parent:
                            if history[-1]["index"] in history[-2]["children"]:
                                parent_hist = history[-2]
                                history[-1]["parent"] = parent_hist["index"]
                            elif history[-1]["index"] in history[-3][
                                    "children"]:
                                parent_hist = history[-3]
                                history[-1]["parent"] = parent_hist["index"]
                            else:
                                raise AssertionError(
                                    "ERROR: your parent should always be one or two iterations behind you! Exiting..."
                                )

                            # if the number of negative frequencies has remained constant or increased from parent to
                            # child,
                            if (history[-1]["num_neg_freqs"] >=
                                    parent_hist["num_neg_freqs"]):
                                # check to see if the parent only has one child, aka only the positive perturbation has
                                # been tried,
                                # in which case just try the negative perturbation from the same parent
                                if len(parent_hist["children"]) == 1:
                                    ref_mol = parent_hist["molecule"]
                                    geom_to_perturb = parent_hist["geometry"]
                                    negative_freq_vecs = parent_hist[
                                        "frequency_mode_vectors"][0]
                                    reversed_direction = True
                                    standard = False
                                    parent_hist["children"].append(
                                        len(history))
                                # If the parent has two children, aka both directions have been tried, then we have to
                                # get creative:
                                elif len(parent_hist["children"]) == 2:
                                    # If we're dealing with just one negative frequency,
                                    if parent_hist["num_neg_freqs"] == 1:
                                        make_good_child_next_parent = False
                                        if (history[parent_hist["children"]
                                                    [0]]["energy"] <
                                                history[-1]["energy"]):
                                            good_child = copy.deepcopy(history[
                                                parent_hist["children"][0]])
                                        else:
                                            good_child = copy.deepcopy(
                                                history[-1])
                                        if good_child["num_neg_freqs"] > 1:
                                            raise Exception(
                                                "ERROR: Child with lower energy has more negative frequencies! "
                                                "Exiting...")
                                        elif (good_child["energy"] <
                                              parent_hist["energy"]):
                                            make_good_child_next_parent = True
                                        elif (vector_list_diff(
                                                good_child[
                                                    "frequency_mode_vectors"]
                                            [0],
                                                parent_hist[
                                                    "frequency_mode_vectors"]
                                            [0],
                                        ) > 0.2):
                                            make_good_child_next_parent = True
                                        else:
                                            raise Exception(
                                                "ERROR: Good child not good enough! Exiting..."
                                            )
                                        if make_good_child_next_parent:
                                            good_child["index"] = len(history)
                                            history.append(good_child)
                                            ref_mol = history[-1]["molecule"]
                                            geom_to_perturb = history[-1][
                                                "geometry"]
                                            negative_freq_vecs = history[-1][
                                                "frequency_mode_vectors"][0]
                                    else:
                                        raise Exception(
                                            "ERROR: Can't deal with multiple neg frequencies yet! Exiting..."
                                        )
                                else:
                                    raise AssertionError(
                                        "ERROR: Parent cannot have more than two childen! Exiting..."
                                    )
                            # Implicitly, if the number of negative frequencies decreased from parent to child,
                            # continue normally.
                        if standard:
                            history[-1]["children"].append(len(history))

                        min_molecule_perturb_scale = 0.1
                        scale_grid = 10
                        perturb_scale_grid = (
                            max_molecule_perturb_scale -
                            min_molecule_perturb_scale) / scale_grid

                        structure_successfully_perturbed = False
                        for molecule_perturb_scale in np.arange(
                                max_molecule_perturb_scale,
                                min_molecule_perturb_scale,
                                -perturb_scale_grid,
                        ):
                            new_coords = perturb_coordinates(
                                old_coords=geom_to_perturb,
                                negative_freq_vecs=negative_freq_vecs,
                                molecule_perturb_scale=molecule_perturb_scale,
                                reversed_direction=reversed_direction,
                            )
                            new_molecule = Molecule(
                                species=orig_species,
                                coords=new_coords,
                                charge=orig_charge,
                                spin_multiplicity=orig_multiplicity,
                            )
                            if check_connectivity:
                                structure_successfully_perturbed = (
                                    check_for_structure_changes(
                                        ref_mol, new_molecule) == "no_change")
                                if structure_successfully_perturbed:
                                    break
                        if not structure_successfully_perturbed:
                            raise Exception(
                                "ERROR: Unable to perturb coordinates to remove negative frequency without changing "
                                "the connectivity! Exiting...")

                        new_opt_QCInput = QCInput(
                            molecule=new_molecule,
                            rem=orig_opt_rem,
                            opt=orig_opt_input.opt,
                            pcm=orig_opt_input.pcm,
                            solvent=orig_opt_input.solvent,
                            smx=orig_opt_input.smx,
                        )
                        new_opt_QCInput.write_file(input_file)
Exemplo n.º 18
0
def _get_SiteEnvironments(struct: PymatgenStructure,
                          cutoff: float,
                          PBC: List[bool],
                          get_permutations: bool = True,
                          eigen_tol: float = 1e-5) -> List[Dict[str, Any]]:
    """
  Used to extract information about both primitive cells and data points.
  Extract local environments from Structure object by calculating neighbors
  based on gaussian distance. For primitive cell, Different permutations of the
  neighbors are calculated and will be later will mapped for data point in the
  _SiteEnvironment.get_mapping() function.
  site types ,

  Parameters
  ----------
  struct: PymatgenStructure
    Pymatgen Structure object of the primitive cell used for calculating
    neighbors from lattice transformations.It also requires site_properties
    attribute with "Sitetypes"(Active or spectator site).
  cutoff : float
    cutoff distance in angstrom for collecting local
    environment.
  pbc : np.ndarray
    Periodic boundary condition
  get_permutations : bool (default True)
    Whether to find permuted neighbor list or not.
  eigen_tol : float (default 1e-5)
    Tolerance for eigenanalysis of point group analysis in
    pymatgen.

  Returns
  ------
  site_envs : List[Dict[str, Any]]
    list of local_env class
  """
    try:
        from pymatgen.core import Molecule
        from pymatgen.symmetry.analyzer import PointGroupAnalyzer
    except:
        raise ImportError("This class requires pymatgen to be installed.")

    pbc = np.array(PBC)
    structure = struct
    neighbors = structure.get_all_neighbors(cutoff, include_index=True)
    symbols = structure.species
    site_idxs = [
        i for i, sitetype in enumerate(structure.site_properties['SiteTypes'])
        if sitetype == 'A1'
    ]
    site_sym_map = {}
    sym_site_map = {}
    for i, new_ele in enumerate(structure.species):
        sym_site_map[new_ele] = structure.site_properties['SiteTypes'][i]
        site_sym_map[structure.site_properties['SiteTypes'][i]] = new_ele

    site_envs = []
    for site_idx in site_idxs:
        local_env_sym = [symbols[site_idx]]
        local_env_xyz = [structure[site_idx].coords]
        local_env_dist = [0.0]
        local_env_sitemap = [site_idx]
        for n in neighbors[site_idx]:
            # if PBC condition is fulfilled..
            c = np.around(n[0].frac_coords, 10)
            withinPBC = np.logical_and(0 <= c, c < 1)
            if np.all(withinPBC[~pbc]):
                local_env_xyz.append(n[0].coords)
                local_env_sym.append(n[0].specie)
                local_env_dist.append(n[1])
                local_env_sitemap.append(n[2])
        local_env_xyz = np.subtract(local_env_xyz, np.mean(local_env_xyz, 0))

        perm = []
        if get_permutations:
            finder = PointGroupAnalyzer(Molecule(local_env_sym, local_env_xyz),
                                        eigen_tolerance=eigen_tol)
            pg = finder.get_pointgroup()
            for i, op in enumerate(pg):
                newpos = op.operate_multi(local_env_xyz)
                perm.append(
                    np.argmin(cdist(local_env_xyz, newpos), axis=1).tolist())

        site_env = {
            'pos': local_env_xyz,
            'sitetypes': [sym_site_map[s] for s in local_env_sym],
            'env2config': local_env_sitemap,
            'permutations': perm,
            'dist': local_env_dist
        }
        site_envs.append(site_env)
    return site_envs
Exemplo n.º 19
0
def parse_symmetry(pos):
    mol = Molecule(["C"] * len(pos), pos)
    pga = PointGroupAnalyzer(mol)
    return pga.sch_symbol
Exemplo n.º 20
0
    def extract_molecule(self, indices: List[int]) -> Molecule:
        struct = self.get_molecular_structure_from_indices(indices)
        sgraph = self.get_structure_graph(struct)
        coords = self.walk_graph_and_get_coords(sgraph)

        return Molecule(species=struct.species, coords=coords)
Exemplo n.º 21
0
from pymatgen.io.vaspio.vasp_input import Incar, Poscar, Potcar, Kpoints
from pymatgen.core import Structure, Molecule


from mpinterfaces.interface import Interface, Ligand

        


#create ligand, interface and slab from the starting POSCARs


strt= Structure.from_file("POSCAR_PbS_bulk_with_vdw")  #using POSCAR of vdW relaxed PbS
mol_struct= Structure.from_file("POSCAR_DMF")    #using POSCAR of vdW relaxed PbS
mol= Molecule(mol_struct.species, mol_struct.cart_coords)
DMF= Ligand([mol])   #create Ligand DMF 
supercell = [1,1,1]
# slab thickness and vacuum set manual for now to converged values, surface coverage fixed at 0.014 ligand/sq.Angstrom 
#for consistency, best ligand spacing at the coverage 
min_thick= 19
min_vac= 12
surface_coverage= 0.014
#hkl of facet to reproduce
hkl= [1,0,0]
# specify the species on slab to adsorb over 
slab_species= 'Pb'
# specify the species onb ligand serving as the bridge atom 
adatom_on_ligand= 'O' 
#initial adsorption distance in angstrom
ads_distance = 3.0 
Exemplo n.º 22
0
    def from_string(cls, string_input):
        """
        Read an FiestaInput from a string. Currently tested to work with
        files generated from this class itself.

        Args:
            string_input: string_input to parse.
        Returns:
            FiestaInput object
        """

        correlation_grid = {}
        Exc_DFT_option = {}
        COHSEX_options = {}
        GW_options= {}
        BSE_TDDFT_options = {}

        lines = string_input.strip().split("\n")

        #number of atoms and species
        lines.pop(0)
        l = lines.pop(0).strip()
        toks = l.split()
        nat = toks[0]
        nsp = toks[1]
        # number of valence bands
        lines.pop(0)
        l = lines.pop(0).strip()
        toks = l.split()
        nvbands = toks[0]

        # correlation_grid
        # number of points and spacing in eV for correlation grid
        lines.pop(0)
        l = lines.pop(0).strip()
        toks = l.split()
        correlation_grid['n_grid'] = toks[0]
        correlation_grid['dE_grid'] = toks[1]

        # Exc DFT
        # relire=1 ou recalculer=0 Exc DFT
        lines.pop(0)
        l = lines.pop(0).strip()
        toks = l.split()
        Exc_DFT_option['rdVxcpsi'] = toks[0]

        # COHSEX
        # number of COHSEX corrected occp and unoccp bands: C=COHSEX  H=HF
        lines.pop(0)
        l = lines.pop(0).strip()
        toks = l.split()
        COHSEX_options['nv_cohsex'] = toks[0]
        COHSEX_options['nc_cohsex'] = toks[1]
        COHSEX_options['eigMethod'] = toks[2]
        # number of COHSEX iter, scf on wfns, mixing coeff; V=RI-V  I=RI-D
        lines.pop(0)
        l = lines.pop(0).strip()
        toks = l.split()
        COHSEX_options['nit_cohsex'] = toks[0]
        COHSEX_options['resMethod'] = toks[1]
        COHSEX_options['scf_cohsex_wf'] = toks[2]
        COHSEX_options['mix_cohsex'] = toks[3]

        # GW
        # number of GW corrected occp and unoccp bands
        lines.pop(0)
        l = lines.pop(0).strip()
        toks = l.split()
        GW_options['nv_corr'] = toks[0]
        GW_options['nc_corr'] = toks[1]
        # number of GW iterations
        lines.pop(0)
        l = lines.pop(0).strip()
        toks = l.split()
        GW_options['nit_gw'] = toks[0]

        # BSE
        # dumping for BSE and TDDFT
        lines.pop(0)
        l = lines.pop(0).strip()
        toks = l.split()
        BSE_TDDFT_options['do_bse'] = toks[0]
        BSE_TDDFT_options['do_tddft'] = toks[1]
        # number of occp. and virtual bands fo BSE: nocore and up to 40 eVs
        lines.pop(0)
        l = lines.pop(0).strip()
        toks = l.split()
        BSE_TDDFT_options['nv_bse'] = toks[0]
        BSE_TDDFT_options['nc_bse'] = toks[1]
        # number of excitations needed and number of iterations
        lines.pop(0)
        l = lines.pop(0).strip()
        toks = l.split()
        BSE_TDDFT_options['npsi_bse'] = toks[0]
        BSE_TDDFT_options['nit_bse'] = toks[1]

        # Molecule
        # list of symbols in order
        lines.pop(0)
        atname = []
        i = int(nsp)
        while i != 0:
            l = lines.pop(0).strip()
            toks = l.split()
            atname.append(toks[0])
            i -= 1

        # scaling factor
        lines.pop(0)
        l = lines.pop(0).strip()
        toks = l.split()
        scale = toks[0]
        # atoms x,y,z cartesian .. will be multiplied by scale
        lines.pop(0)
        #Parse geometry
        species = []
        coords = []
        i = int(nat)
        while i != 0:
            l = lines.pop(0).strip()
            toks = l.split()
            coords.append([float(j) for j in toks[0:3]])
            species.append(atname[int(toks[3])-1])
            i -= 1

        mol = Molecule(species, coords)

        return FiestaInput(mol=mol, correlation_grid=correlation_grid, Exc_DFT_option=Exc_DFT_option, COHSEX_options=COHSEX_options,
                           GW_options=GW_options, BSE_TDDFT_options=BSE_TDDFT_options)