Exemplo n.º 1
0
    def test_align(self):
        path = "test/static/phenylpropane*.out"
        conformational_ensemble = cctk.ConformationalEnsemble()
        for filename in sorted(glob.glob(path)):
            gaussian_file = cctk.GaussianFile.read_file(filename)
            ensemble = gaussian_file.ensemble
            molecule = ensemble.molecules[-1]
            properties_dict = ensemble.get_properties_dict(molecule)
            conformational_ensemble.add_molecule(molecule, properties_dict)

        comparison_atoms = [1, 2, 3, 4, 5, 6]
        # added np.int64 here to check that the to_geometry parameter will take any int
        aligned_ensemble, before_RMSD, after_RMSD = conformational_ensemble.align(
            to_geometry=np.int64(0),
            comparison_atoms=comparison_atoms,
            compute_RMSD=True)
        for before, after in zip(before_RMSD, after_RMSD):
            self.assertLess(after, 0.0001)
        cctk.GaussianFile.write_ensemble_to_file(
            "test/static/phenylpropane_aligned.gjf", aligned_ensemble, "#p")

        ensemble2, rmsds = aligned_ensemble.eliminate_redundant(
            RMSD_cutoff=0.5, comparison_atoms="heavy", return_RMSD=True)
        self.assertEqual(len(ensemble2), 3)

        cctk.GaussianFile.write_ensemble_to_file(
            "test/static/phenylpropane_aligned2.gjf", ensemble2, "#p")
        ensemble3 = aligned_ensemble.eliminate_redundant(
            RMSD_cutoff=0.5, comparison_atoms=comparison_atoms)
        self.assertEqual(len(ensemble3), 1)

        cctk.GaussianFile.write_ensemble_to_file(
            "test/static/phenylpropane_aligned3.gjf", ensemble3, "#p")
        cctk.MOL2File.write_ensemble_to_file(
            "test/static/phenylpropane_aligned.mol2", aligned_ensemble)
Exemplo n.º 2
0
 def as_ensemble(self):
     ensemble = cctk.ConformationalEnsemble()
     # for frame in self.frames[:-1]: # why is this up to only the second last frame?
     for frame in self.frames:
         ensemble.add_molecule(frame.molecule(idxs), {
             "bath_temperature": frame.bath_temperature,
             "energy": frame.energy
         })
     return ensemble
Exemplo n.º 3
0
 def build_test_ensemble(self):
     path = "test/static/phenylpropane*.out"
     conformational_ensemble = cctk.ConformationalEnsemble()
     for filename in sorted(glob.glob(path)):
         gaussian_file = cctk.GaussianFile.read_file(filename)
         ensemble = gaussian_file.ensemble
         molecule = ensemble.molecules[-1]
         properties_dict = ensemble.get_properties_dict(molecule)
         conformational_ensemble.add_molecule(molecule, properties_dict)
     return conformational_ensemble
Exemplo n.º 4
0
    def read_ensemble(cls, filename, conformational=False):
        """
        Alias for read_trajectory.
        """
        files = cls.read_trajectory(filename)

        ensemble = None
        if conformational:
            ensemble = cctk.ConformationalEnsemble()
        else:
            ensemble = cctk.Ensemble()

        for f in files:
            ensemble.add_molecule(f.molecule)

        return ensemble
Exemplo n.º 5
0
    def test_final_structure(self):
        path1 = "test/static/methane_perturbed.gjf"
        path2 = "test/static/methane_perturbed_key.gjf"

        e = cctk.ConformationalEnsemble()

        e.add_molecule(
            cctk.GaussianFile.read_file(
                path1).get_molecule().assign_connectivity())
        e.add_molecule(
            cctk.GaussianFile.read_file(
                path2).get_molecule().assign_connectivity().renumber_to_match(
                    e.molecules[0]))

        e2, before, after = e.align(comparison_atoms="all", compute_RMSD=True)

        self.assertTrue(after[1] < 0.005)
Exemplo n.º 6
0
    def test_boltzmann_weighting(self):
        conformational_ensemble = self.build_test_ensemble()

        values, weights = conformational_ensemble.boltzmann_average(
            "energy",
            energies=[1.36, 0, 1000, 1000, 1000, 1000],
            energy_unit="kcal_mol",
            return_weights=True)
        self.assertTrue((weights[0] / weights[1] - 0.1 < 0.01))
        self.assertTrue(values - 0.016152 < 0.0001)

        ce2 = cctk.ConformationalEnsemble()
        for filename in glob.glob("test/static/pentane*.out"):
            gaussian_file = cctk.GaussianFile.read_file(filename)
            ensemble = gaussian_file.ensemble
            molecule = ensemble.molecules[-1]
            properties_dict = ensemble.get_properties_dict(molecule)
            ce2.add_molecule(molecule, properties_dict)
        enthalpy = ce2.boltzmann_average("enthalpy")
        self.assertTrue(enthalpy - .10722 < 0.0001)
Exemplo n.º 7
0
    def test_ensemble_indexing2(self):
        path = "test/static/gaussian_file.out"
        file = cctk.GaussianFile.read_file(path)
        mols = file.ensemble
        self.assertTrue(isinstance(mols, cctk.ConformationalEnsemble))

        ensemble = cctk.ConformationalEnsemble()
        for i, molecule in enumerate(mols.molecules):
            ensemble.add_molecule(molecule)
            ensemble[molecule, "test_property"] = i

        self.assertTrue(len(ensemble) == 3)
        self.assertListEqual(list(ensemble[:, "test_property"]), [0, 1, 2])

        ensemble = cctk.Ensemble()
        for i, molecule in enumerate(mols.molecules):
            ensemble.add_molecule(molecule)
            ensemble[molecule, "test_property"] = i

        self.assertTrue(len(ensemble) == 3)
        self.assertListEqual(list(ensemble[:, "test_property"]), [0, 1, 2])
Exemplo n.º 8
0
    def test_write(self):
        read_path = "test/static/test_peptide.xyz"
        path = "test/static/test_peptide.inp"
        new_path = "test/static/test_peptide_copy.inp"

        file = cctk.XYZFile.read_file(read_path)
        self.assertTrue(isinstance(file.molecule, cctk.Molecule))

        header = "! aug-cc-pVTZ aug-cc-pVTZ/C DLPNO-CCSD(T) TightSCF TightPNO MiniPrint"
        variables = {"maxcore": 4000}
        blocks = {"pal": ["nproc 4"], "mdci": ["density none"]}

        cctk.OrcaFile.write_molecule_to_file(new_path, file.molecule, header,
                                             variables, blocks)

        with open(path) as old:
            with open(new_path) as new:
                self.assertListEqual(list(new), list(old))

        os.remove(new_path)

        ensemble = cctk.ConformationalEnsemble()
        ensemble.add_molecule(file.molecule)

        orca_file = cctk.OrcaFile(job_types=[cctk.OrcaJobType.SP],
                                  ensemble=ensemble,
                                  header=header,
                                  blocks=blocks,
                                  variables=variables)
        orca_file.write_file(new_path)

        with open(path) as old:
            with open(new_path) as new:
                self.assertListEqual(list(new), list(old))

        os.remove(new_path)
Exemplo n.º 9
0
import time, sys, glob
import numpy as np

sys.path.insert(0,'/Users/cwagen/code/cctk')
import cctk

#### 80.6 seconds before refactoring (6/6/2020) (I think)
#### 26.7 seconds after refactoring (6/6/2020)

files = glob.glob("/Users/cwagen/code/Martin/ts/*.out")

w_start = time.time()
p_start = time.process_time()

e = cctk.ConformationalEnsemble()
for path in files:
    f = cctk.GaussianFile.read_file(path)
    m = f.get_molecule().assign_connectivity()
    if len(e):
        m = m.renumber_to_match(e.molecules[0])
    e.add_molecule(m)

w_end = time.time()
p_end = time.process_time()
print(f"Elapsed time {w_end-w_start:.2f} s (CPU: {p_end-p_start:.2f} s)")

#### 18.6 seconds before refactoring (6/6/2020)
#### 3.88 seconds after refactoring (6/6/2020)

w_start = time.time()
p_start = time.process_time()
Exemplo n.º 10
0
def main():
    e = cctk.ConformationalEnsemble()

    parser = argparse.ArgumentParser(prog="parse_conformations.py")
    parser.add_argument("--cutoff", "-c", type=float, default=15)
    parser.add_argument("--rmsd_cutoff", "-C", type=float, default=0.5)
    parser.add_argument("prefix", type=str)
    parser.add_argument("files", nargs='+')
    args = vars(parser.parse_args(sys.argv[1:]))

    print("\n\033[3mreading files:\033[0m")

    pool = mp.Pool(processes=16)
    for output_file in tqdm(pool.imap(read, args['files']),
                            total=len(args['files'])):
        molecule = output_file.get_molecule()
        # there has got to be a better way to do this
        e.add_molecule(*list(output_file.ensemble.items())[-1])
        e[molecule, "iters"] = len(output_file.ensemble)
        e[molecule, "success"] = output_file.successful_terminations
        e[molecule, "imaginary"] = output_file.imaginaries()

    if len(e) == 0:
        print("no jobs to analyze!")
        exit()

    print(f"{len(e)} files read.")
    e, rmsds = e.eliminate_redundant(RMSD_cutoff=args['rmsd_cutoff'],
                                     return_RMSD=True)
    print(
        f"{len(e)} distinct conformations identified (RMSD cutoff of {args['rmsd_cutoff']:.2f})."
    )

    print("\n\033[3manalysis:\033[0m")

    property_names = [
        "filename", "new_filename", "rmsd", "iters", "energy", "success",
        "imaginary"
    ]
    values = e[:, property_names]
    if not isinstance(values[0], list):
        values = [values]

    df = pd.DataFrame(values, columns=property_names).fillna(0)
    df["rmsd"] = rmsds
    df["new_filename"] = [
        f"{args['prefix']}_c{i:03}.gjf" for i in range(len(df))
    ]
    df["rel_energy"] = (df.energy - df.energy.min()) * 627.509469
    df.sort_values("rel_energy", inplace=True)

    normal_col_names = copy.deepcopy(df.columns)

    df.columns = [f"\033[1m{c}\033[0m" for c in df.columns]
    print(tabulate(df, headers="keys", tablefmt="presto", floatfmt=".5f"))
    df.columns = normal_col_names

    template_file = read(args['files'][0])

    print("\n\033[3moutput:\033[0m")
    for m, n, e in zip(e.molecule_list(), df["new_filename"],
                       df["rel_energy"]):
        if e <= args["cutoff"]:
            template_file.write_file(n, molecule=m)
            print(f"Wrote {n}")
        else:
            print(
                f"Skipping {n} and all subsequent rows: relative energy of {e:.2f} exceeds cutoff of {args['cutoff']:.2f}"
            )
            break