Exemplo n.º 1
0
def main(**kwargs):
    outfn = os.path.splitext(opt['infn'])[0] + '_' + opt['suffix'] + '.mol2'
    success = False

    ### Read in .sdf file and distinguish each molecule's conformers
    ifs = oechem.oemolistream()
    ifs.SetConfTest(oechem.OEAbsoluteConfTest())
    if not ifs.open(opt['infn']):
        oechem.OEThrow.Warning("Unable to open %s for reading" % opt['infn'])
        return

    for mol in ifs.GetOEMols():
        if mol.GetTitle() == opt['title']:
            # write out all confs in this mol if no SD tag is specified
            if opt['sdtag'] == "":
                success = True
                write_conf_mol(outfn, mol)
                return
            # look for the conformer in this mol with specific SD tag value
            for i, conf in enumerate(mol.GetConfs()):
                if oechem.OEGetSDData(conf, opt['sdtag']) == opt['value']:
                    success = True
                    write_conf_mol(outfn, conf)
    if not success:
        print("\n** Found no confs matching your criteria. **")
    ifs.close()
Exemplo n.º 2
0
def extractXY(fname, tag):

    ifs = oechem.oemolistream()
    ifs.SetConfTest(oechem.OEAbsoluteConfTest())
    if not ifs.open(fname):
        oechem.OEThrow.Warning("Unable to open %s for reading" % fname)

    xlist = []
    ylist = []

    for mol in ifs.GetOEMols():
        for j, conf in enumerate(mol.GetConfs()):
            xlist.append(int(mol.GetTitle()))
            try:
                ylist.append(float(oechem.OEGetSDData(conf, tag)))
            except ValueError as e:
                print("Missing tag data for mol {}, conf {}! Skipping.".format(
                    mol.GetTitle(), j))
                print(e)
                xlist.pop()

    ### convert to numpy array, take relative e, convert to kcal/mol
    ylist = np.array(ylist)
    ylist = ylist - ylist[0]
    ylist = 627.5095 * ylist
    return xlist, ylist
Exemplo n.º 3
0
def convertSDFfile(reffile, filtfile, writeout):
    refifs = oechem.oemolistream()
    filtifs = oechem.oemolistream()
    ofs = oechem.oemolostream()

    ### Read in reference file, but don't need its old conformers
    if not refifs.open(reffile):
        oechem.OEThrow.Warning("Unable to open %s for reading" % reffile)
        return

    ### Read in filtered file and distinguish each molecule's conformers
    filtifs.SetConfTest( oechem.OEAbsoluteConfTest() )
    if not filtifs.open(filtfile):
        oechem.OEThrow.Warning("Unable to open %s for reading" % filtfile)
        return

    ### Open outstream file.
    if os.path.exists(writeout):
        print("File already exists: %s. Skip getting results.\n" % (writeout))
        return
    if not ofs.open(writeout):
        oechem.OEThrow.Fatal("Unable to open %s for writing" % writeout)

    ### Loop and write molecules. (though refifs should only have ONE mol)
    for rmol in refifs.GetOEMols():
        for fmol in filtifs.GetOEMols():
            for i, conf in enumerate( fmol.GetConfs()):
                rmol.SetCoords(conf.GetCoords())
                oechem.OEWriteConstMolecule(ofs, rmol)

    refifs.close()
    filtifs.close()
    ofs.close()
Exemplo n.º 4
0
 def load_file(fname):
     ifs = oechem.oemolistream()
     ifs.SetConfTest(oechem.OEAbsoluteConfTest())
     if not ifs.open(fname):
         oechem.OEThrow.Fatal("Unable to open %s for reading" % fname)
     mols = ifs.GetOEMols()
     return mols
Exemplo n.º 5
0
def main(argv=[__name__]):

    itf = oechem.OEInterface(InterfaceData)
    oedepict.OEConfigureReportOptions(itf)
    oedepict.OEConfigurePrepareDepictionOptions(itf)
    oedepict.OEConfigure2DMolDisplayOptions(itf)

    if not oechem.OEParseCommandLine(itf, argv):
        return 1

    iname = itf.GetString("-in")
    ifs = oechem.oemolistream()
    ifs.SetConfTest(oechem.OEAbsoluteConfTest())  # VTL
    if not ifs.open(iname):
        oechem.OEThrow.Fatal("Cannot open input file!")

    oname = itf.GetString("-out")
    ext = oechem.OEGetFileExtension(oname)
    if ext != "pdf":
        oechem.OEThrow.Fatal("Output must be PDF format.")

    ofs = oechem.oeofstream()
    if not ofs.open(oname):
        oechem.OEThrow.Fatal("Cannot open output file!")

    if itf.HasString("-ringdict"):
        rdfname = itf.GetString("-ringdict")
        if not oechem.OEInit2DRingDictionary(rdfname):
            oechem.OEThrow.Warning("Cannot use user-defined ring dictionary!")

    ropts = oedepict.OEReportOptions()
    oedepict.OESetupReportOptions(ropts, itf)
    ropts.SetFooterHeight(25.0)
    report = oedepict.OEReport(ropts)

    popts = oedepict.OEPrepareDepictionOptions()
    oedepict.OESetupPrepareDepictionOptions(popts, itf)

    dopts = oedepict.OE2DMolDisplayOptions()
    oedepict.OESetup2DMolDisplayOptions(dopts, itf)
    dopts.SetDimensions(report.GetCellWidth(), report.GetCellHeight(),
                        oedepict.OEScale_AutoScale)

    for mol in ifs.GetOEMols():  # VTL ignore confs; dont use GetOEGraphMols
        print(mol.GetTitle())  # VTL
        cell = report.NewCell()
        oedepict.OEPrepareDepiction(mol, popts)
        disp = oedepict.OE2DMolDisplay(mol, dopts)
        oedepict.OERenderMolecule(cell, disp)

    font = oedepict.OEFont(oedepict.OEFontFamily_Default,
                           oedepict.OEFontStyle_Bold, 12,
                           oedepict.OEAlignment_Center, oechem.OEBlack)
    for pagenum, footer in enumerate(report.GetFooters()):
        text = "Page %d of %d" % (pagenum + 1, report.NumPages())
        oedepict.OEDrawTextToCenter(footer, text, font)

    oedepict.OEWriteReport(ofs, ext, report)

    return 0
Exemplo n.º 6
0
def filter_confs(rmsdfile, tag, rmsdout):
    """
    Read in OEMols (and each of their conformers) in 'rmsdfile'.
    For each molecule:
        rough filter conformers based on energy differences specified by 'tag',
        fine filter conformers based on RMSD values.

    Parameters
    ----------
    rmsdfile : str
        Name of SDF file with conformers to be filtered
    tag : str
        SD tag name with the energy value to roughly screen conformers before RMSD
        Screening works by removing conformers of very similar energies, where
        "similar" is defined by thresE parameter. Examples:
        - "QM Psi4 Final Opt. Energy (Har) mp2/def-sv(p)"
        - "QM Psi4 Final Single Pt. Energy (Har) mp2/def-sv(p)"
    rmsdout : str
        Name of the output file with filtered conformers

    """
    # Parameters for distinguishing cutoff of conformer similarity
    thresE = 5.E-4  # declare confs diff & skip RMSD comparison above this threshold
    thresRMSD = 0.2  # above this threshold (Angstrom), confs are "diff" minima

    wdir, fname = os.path.split(rmsdfile)
    numConfsF = open(os.path.join(os.getcwd(), "numConfs.txt"), 'a')
    numConfsF.write("\n{}\n".format(tag))

    # Open file to be processed.
    rmsd_ifs = oechem.oemolistream()
    if not rmsd_ifs.open(rmsdfile):
        oechem.OEThrow.Fatal("Unable to open %s for reading" % rmsdfile)
    rmsd_ifs.SetConfTest(oechem.OEAbsoluteConfTest())
    rmsd_molecules = rmsd_ifs.GetOEMols()

    # Open outstream file.
    rmsd_ofs = oechem.oemolostream()
    if os.path.exists(rmsdout):
        print("%s output file already exists in %s. Skip filtering.\n" %
              (rmsdout, os.getcwd()))
        return
    if not rmsd_ofs.open(rmsdout):
        oechem.OEThrow.Fatal("Unable to open %s for writing" % rmsdout)

    # Identify minima and write output file.
    for mol in rmsd_molecules:
        if identify_minima(mol, tag, thresE, thresRMSD):
            numConfsF.write("%s\t%s\n" % (mol.GetTitle(), mol.NumConfs()))
            oechem.OEWriteConstMolecule(rmsd_ofs, mol)
        else:
            numConfsF.write("%s\t0\n" % (mol.GetTitle()))
    rmsd_ifs.close()
    numConfsF.close()
    rmsd_ofs.close()

    print("Done filtering %s to %s.\n" % (fname, rmsdout))
Exemplo n.º 7
0
def timeAvg(titles, sdfRef, method, basis, tag):
    """

    For an SDF file with all confs of all mols, get the average runtime
       of all conformers for each molecule

    Parameters
    ----------
    titles: dictionary (empty or not). keys = molTitles.
        values = [[qm1_avg, qm1_std], [qm2_avg, qm2_std] ... ]
    sdfRef | str  | path+name of SDF file with times for all confs of all mols

    TODO

    """

    # Open reference file.
    print("Opening SDF file %s" % sdfRef)
    ifsRef = oechem.oemolistream()
    ifsRef.SetConfTest(oechem.OEAbsoluteConfTest())
    if not ifsRef.open(sdfRef):
        oechem.OEThrow.Fatal("Unable to open %s for reading" % sdfRef)
    molsRef = ifsRef.GetOEMols()

    timeF = open("timeAvgs.txt", 'a')
    timeF.write("\nFile: {}\n".format(sdfRef))
    timeF.write(
        "Average [{}/{}] [{}s] over all confs for each molecule\n".format(
            method, basis, tag))

    # Grab all the times.
    #    titles = {}
    #    timelist = []
    #    stdlist = []
    for rmol in molsRef:
        tmol = np.fromiter(pt.GetSDList(rmol, tag, 'Psi4', method, basis),
                           dtype=np.float64)

        # exclude conformers for which job did not finish (nan)
        nanIndices = np.argwhere(np.isnan(tmol))
        for i in reversed(nanIndices):  # loop in reverse to delete correctly
            tmol = np.delete(tmol, i)
        meantime = np.mean(tmol)
        stdtime = np.std(tmol)
        timeF.write("%s\t%d confs\t\t%.3f +- %.3f\n" %
                    (rmol.GetTitle(), tmol.size, meantime, stdtime))

        name = rmol.GetTitle()
        if name not in titles: titles[name] = []
        titles[name].append([meantime, stdtime])
#        titles.append(rmol.GetTitle())
#        timelist.append(meantime)
#        stdlist.append(stdtime)
    timeF.close()
    #    return titles, timelist, stdlist
    return titles
Exemplo n.º 8
0
def read_mol(infile, many=False):
    # many for multiple conformers, set False for single conf/mol
    ifs = oechem.oemolistream()
    if not ifs.open(infile):
        oechem.OEThrow.Fatal("Unable to open {} for reading".format(infile))
    ifs.SetConfTest(oechem.OEAbsoluteConfTest())
    if many:
        return ifs.GetOEMols()
    mol = oechem.OEGraphMol()
    oechem.OEReadMolecule(ifs, mol)
    ifs.close()
    return mol
Exemplo n.º 9
0
    def prelim(sdfRef,spe):
        # Open file.
        ifs1 = oechem.oemolistream()
        ifs1.SetConfTest( oechem.OEAbsoluteConfTest() )
        if not ifs1.open(sdfRef):
            oechem.OEThrow.Fatal("Unable to open %s for reading" % sdfRef)
        mols = ifs1.GetOEMols()

        # Determine SD tag from which to obtain energy.
        if spe.lower()=='true': tagword = "QM spe"
        else: tagword = "QM opt energy"
        return mols, tagword
Exemplo n.º 10
0
def plotSDF(infile, tag, figname='lineplot.png'):
    """
    Parameters
    ----------
    """

    ### Read in .sdf file and distinguish each molecule's conformers
    ifs = oechem.oemolistream()
    ifs.SetConfTest(oechem.OEAbsoluteConfTest())
    if not ifs.open(infile):
        oechem.OEThrow.Warning("Unable to open %s for reading" % infile)
        return

    xlist = []
    ylist = []

    for mol in ifs.GetOEMols():
        print(mol.GetTitle(), mol.NumConfs())
        for j, conf in enumerate(mol.GetConfs()):
            try:
                ylist.append(float(oechem.OEGetSDData(conf, tag)))
                xlist.append(int(mol.GetTitle()))
            except ValueError as err:
                pass  # mols not converged may not have tag

    ### convert to numpy array, take relative e, convert to kcal/mol
    ylist = np.array(ylist)
    ylist = ylist - ylist[0]
    ylist = 627.5095 * ylist

    ### Plot.
    xlabel = 'conformation number'
    ylabel = "Relative energy (kcal/mol)"

    fig = plt.figure()
    #    ax = fig.gca()
    #    ax.set_xticks(np.arange(-1,RefNumConfs+1,2))

    plt.ylabel(ylabel, fontsize=14)
    plt.xlabel(xlabel, fontsize=14)
    plt.scatter(xlist, ylist)
    #    plt.plot(xlist, ylist)
    plt.ylim(-1, 16)
    #    plt.xticks(range(RefNumConfs),xlabs,fontsize=12)
    #    plt.yticks(fontsize=12)
    plt.grid()

    plt.savefig(figname, bbox_inches='tight')
    plt.show()
Exemplo n.º 11
0
def read_mols_tag(insdf, calctype):

    if calctype not in {'opt', 'spe'}:
        sys.exit("Specify a valid calculation type for {}.".format(insdf))

    # Open file.
    ifs1 = oechem.oemolistream()
    ifs1.SetConfTest(oechem.OEAbsoluteConfTest())
    if not ifs1.open(insdf):
        oechem.OEThrow.Fatal("Unable to open %s for reading" % insdf)
    mols = ifs1.GetOEMols()

    # Determine SD tag from which to obtain energy.
    if calctype.lower() == 'spe':
        tagword = "QM spe"
    else:
        tagword = "QM opt energy"
    return mols, tagword
Exemplo n.º 12
0
def confs2turb(insdf):
    """
    Parameters
    ----------
    insdf:  string - PATH+name of SDF file

    """
    homedir = os.getcwd()
    p = sp.call('module load turbomole/7.1/intel', shell=True)

    ### Read in .sdf file and distinguish each molecule's conformers
    ifs = oechem.oemolistream()
    ifs.SetConfTest(oechem.OEAbsoluteConfTest())
    if not ifs.open(insdf):
        oechem.OEThrow.Warning("Unable to open %s for reading" % insdf)
        return

    ### For each molecule: for each conf, generate input
    for mol in ifs.GetOEMols():
        print(mol.GetTitle(), mol.NumConfs())
        for i, conf in enumerate(mol.GetConfs()):
            # change into subdirectory to use x2t
            subdir = os.path.join(homedir, "%s/%s" % (mol.GetTitle(), i + 1))
            if not os.path.isdir(subdir):
                os.makedirs(subdir)
            os.chdir(subdir)

            # write out relevant files
            label = mol.GetTitle() + '_' + str(i + 1)
            ofile = open('options', 'w')
            xfile = open('input.xyz', 'w')
            optinfo, xinfo = GetMolDetails(conf, label)
            ofile.write(optinfo)
            xfile.write(xinfo)
            ofile.close()
            xfile.close()

            # run x2t
            p = sp.Popen('x2t input.xyz > coord', shell=True)
            p.wait()
            #os.chdir(wdir) # i don't think i need this?

    ifs.close()
Exemplo n.º 13
0
def quan2modsem(infile, pfile):

    hdir, fname = os.path.split(infile)

    # read in sdf file and distinguish each molecule's conformers
    ifs = oechem.oemolistream()
    ifs.SetConfTest(oechem.OEAbsoluteConfTest())
    if not ifs.open(infile):
        sys.exit("Unable to open %s for reading" % infile)
    molecules = ifs.GetOEMols()

    # open quanformer-generated pickle file with dictionary of hessians
    hdict = pickle.load(open(pfile, 'rb'))

    for mol in molecules:
        print("===== %s =====" % (mol.GetTitle()))
        for j, conf in enumerate(mol.GetConfs()):

            # set file locations; dir for modsem needs / at end of string
            datadir = os.path.join(hdir, "%s/%s/" % (mol.GetTitle(), j + 1))

            # extract hessian from the quanformer-generated dictionary (get_psi_results)
            hessian = hdict[mol.GetTitle()][j + 1]

            # run modsem
            bond_list, angle_list, coords, N, hessian, atom_names = prep_hess(
                mol, hessian)
            modified_Seminario_method_vtl2.modified_Seminario_method(
                bond_list,
                angle_list,
                coords,
                N,
                hessian,
                atom_names,
                datadir,
                datadir,
                vibrational_scaling=1)

            # check to make sure files were generated, and note the ones that didn't work
            # TODO

    ifs.close()
Exemplo n.º 14
0
def confs2psi(insdf, method, basis, spe=False, memory=None):
    """
    Parameters
    ----------
    insdf:  string - PATH+name of SDF file
    method: string - method. E.g. "mp2"
    basis:  string - basis set. E.g. "def2-sv(p)"
    spe:    boolean. True for single point energy calcns, False for geom opt.
            default option is False.
    memory: string - memory specification. Psi4 default is 256 Mb. E.g. "1.5 Gb"

    """
    wdir = os.getcwd()

    ### Read in .sdf file and distinguish each molecule's conformers
    ifs = oechem.oemolistream()
    ifs.SetConfTest( oechem.OEAbsoluteConfTest() )
    if not ifs.open(insdf):
        oechem.OEThrow.Warning("Unable to open %s for reading" % insdf)
        return

    ### For each molecule: for each conf, generate input
    for mol in ifs.GetOEMols():
        print(mol.GetTitle(), mol.NumConfs())
        if not mol.GetTitle():
            sys.exit("ERROR: OEMol must have title assigned! Exiting.")
        for i, conf in enumerate( mol.GetConfs()):
            # change into subdirectory ./mol/conf/
            subdir = os.path.join(wdir,"%s/%s" % (mol.GetTitle(), i+1))
            if not os.path.isdir(subdir):
                os.makedirs(subdir)
            if os.path.exists(os.path.join(subdir,'input.dat')):
                print("Input file (\"input.dat\") already exists. Skipping.\n")
                continue
            label = mol.GetTitle()+'_'+str(i+1)
            ofile = open(os.path.join(subdir,'input.dat'), 'w')
            ofile.write(make_psi_input( conf, label, method, basis, spe, memory))
            ofile.close()
    ifs.close()
Exemplo n.º 15
0
def filterConfs(rmsdfile, tag, suffix):
    """
    Read in OEMols (and each of their conformers) in 'rmsdfile'.
    For each molecule:
        rough filter conformers based on energy differences specified by 'tag',
        fine filter conformers based on RMSD values.

    Parameters
    ----------
    rmsdfile: string - PATH+full name of to-be-filtered SDF file.
        This path will house soon-generated final output sdf file.
    tag:      string - describing the SD tag with the energy value to rough
        filter conformers. A very small energy difference is considered
        to be the same conformer (see thresE). Above this energy difference,
        RMSD comparison is evaluated to distinguish if two confs are diff.
        Ex. QM Psi4 Final Opt. Energy (Har) mp2/def-sv(p)
            QM Psi4 Single Pt. Energy (Har) mp2/def-sv(p)
    suffix:   string - string appended to the basename of rmsdfile to distinguish
        that this file has been filtered.
        Ex. if rmsdfile=/some/dir/basename-210.sdf and suffix=220 then output
            becomes /some/dir/basename-220.sdf

    """
    # Parameters for distinguishing cutoff of conformer similarity
    thresE = 5.E-4  # declare confs diff & skip RMSD comparison above this threshold
    thresRMSD = 0.2  # above this threshold (Angstrom), confs are "diff" minima

    wdir, fname = os.path.split(rmsdfile)
    numConfsF = open(os.path.join(os.getcwd(), "numConfs.txt"), 'a')
    numConfsF.write(tag + "\n")

    # Open file to be processed.
    rmsd_ifs = oechem.oemolistream()
    if not rmsd_ifs.open(rmsdfile):
        oechem.OEThrow.Fatal("Unable to open %s for reading" % rmsdfile)
    rmsd_ifs.SetConfTest(oechem.OEAbsoluteConfTest())
    rmsd_molecules = rmsd_ifs.GetOEMols()

    # Open outstream file.
    rmsdout = ("%s-%s.sdf" %
               (fname.replace('-', '.').split('.')[0], str(suffix)))
    rmsd_ofs = oechem.oemolostream()
    if os.path.exists(rmsdout):
        print("%s output file already exists in %s. Skip filtering.\n" %
              (rmsdout, os.getcwd()))
        return
    if not rmsd_ofs.open(rmsdout):
        oechem.OEThrow.Fatal("Unable to open %s for writing" % rmsdout)

    # Identify minima and write output file.
    for mol in rmsd_molecules:
        if IdentifyMinima(mol, tag, thresE, thresRMSD):
            numConfsF.write("%s\t%s\n" % (mol.GetTitle(), mol.NumConfs()))
            oechem.OEWriteConstMolecule(rmsd_ofs, mol)
        else:
            numConfsF.write("%s\t0\n" % (mol.GetTitle()))
    rmsd_ifs.close()
    numConfsF.close()
    rmsd_ofs.close()

    print("Done filtering %s to %s.\n" % (fname, rmsdout))
Exemplo n.º 16
0
def confs_to_psi(insdf,
                 method,
                 basis,
                 calctype='opt',
                 memory=None,
                 via_json=False):
    """
    Read in molecule(s) (and conformers, if present) in insdf file. Create
    Psi4 input calculations for each structure.

    Parameters
    ----------
    insdf: string
        Name of the molecule file for which to create Psi4 input file.
        SDF format can contain multiple molecules and multiple conformers per
        molecule in a single file.
    method: string
        Name of the method as understood by Psi4. Example: "mp2"
    basis : string
        Name of the basis set as understood by Psi4. Example: "def2-sv(p)"
    calctype : string
        What kind of Psi4 calculation to run. Supported inputs are:
        'opt' for geometry optimization,
        'spe' for single point energy calculation, and
        'hess' for Hessian calculation.
    memory : string
        How much memory each Psi4 job should take. If not specified, the
        default in Psi4 is 500 Mb. Examples: "2000 MB" "1.5 GB"
        http://www.psicode.org/psi4manual/master/psithoninput.html
    via_json : Boolean
        If True, use JSON wrapper for Psi4 input and output.
        - Psi4 input would be in "input.py", called with python
        - Psi4 output would be in "output.json"
        If False, use normal text files for Psi4 input and output.
        - Psi4 input would be in "input.dat"
        - Psi4 output would be in "output.dat"
    """
    wdir = os.getcwd()

    ### Read in .sdf file and distinguish each molecule's conformers
    ifs = oechem.oemolistream()
    ifs.SetConfTest(oechem.OEAbsoluteConfTest())
    if not ifs.open(insdf):
        oechem.OEThrow.Warning("Unable to open %s for reading" % insdf)
        return

    ### For each molecule: for each conf, generate input
    for mol in ifs.GetOEMols():
        print(mol.GetTitle(), mol.NumConfs())
        if not mol.GetTitle():
            sys.exit("ERROR: OEMol must have title assigned! Exiting.")
        for i, conf in enumerate(mol.GetConfs()):
            # change into subdirectory ./mol/conf/
            subdir = os.path.join(wdir, "%s/%s" % (mol.GetTitle(), i + 1))
            if not os.path.isdir(subdir):
                os.makedirs(subdir)
            if os.path.exists(os.path.join(subdir, 'input.dat')):
                print("Input file already exists. Skipping.\n{}\n".format(
                    os.path.join(subdir, 'input.dat')))
                continue
            label = mol.GetTitle() + '_' + str(i + 1)
            if via_json:
                ofile = open(os.path.join(subdir, 'input.py'), 'w')
                ofile.write("# molecule {}\n\nimport numpy as np\nimport psi4"
                            "\nimport json\n\njson_data = ".format(label))
                json.dump(make_psi_json(conf, label, method, basis, calctype,
                                        memory),
                          ofile,
                          indent=4,
                          separators=(',', ': '))
                ofile.write(
                    "\njson_ret = psi4.json_wrapper.run_json(json_data)\n\n")
                ofile.write("with open(\"output.json\", \"w\") as ofile:\n\t"
                            "json.dump(json_ret, ofile, indent=2)\n\n")
            else:
                ofile = open(os.path.join(subdir, 'input.dat'), 'w')
                ofile.write(
                    make_psi_input(conf, label, method, basis, calctype,
                                   memory))
            ofile.close()
    ifs.close()
Exemplo n.º 17
0
def getPsiResults(origsdf,
                  finsdf,
                  spe=False,
                  psiout="output.dat",
                  timeout="timer.dat"):
    """
    Read in OEMols (and each of their conformers) in origsdf file,
        get results from Psi4 calculations in the same directory as origsdf,
        and write out results into finsdf file.
    Directory layout is .../maindir/molName/confNumber/outputfiles .

    Parameters
    ----------
    origsdf:  string - PATH+full name of orig pre-opt SDF file.
        Path should contain (1) all confs' jobs, (2) orig sdf file.
        This path will house soon-generated final output sdf file.
    finsdf:   string - full name of final SDF file with optimized results.
    spe:     Boolean - are the Psi4 results of a single point energy calcn?
    psiout:   string - name of the Psi4 output files. Default is "output.dat"
    timeout: string - name of the Psi4 timer files. Default is "timer.dat"

    Returns
    -------
    method: string - QM method from Psi4 calculations
    basisset: string - QM basis set from Psi4 calculations

    None is returned if the function returns early (e.g., if output file
       already exists) or if there is KeyError from processing last
       iteration of output file (last conf of last mol).

    """

    wdir, fname = os.path.split(origsdf)
    wdir = os.getcwd()

    # Read in .sdf file and distinguish each molecule's conformers
    ifs = oechem.oemolistream()
    ifs.SetConfTest(oechem.OEAbsoluteConfTest())
    if not ifs.open(origsdf):
        oechem.OEThrow.Warning("Unable to open %s for reading" % origsdf)
        quit()
    molecules = ifs.GetOEMols()

    # Open outstream file.
    writeout = os.path.join(wdir, finsdf)
    write_ofs = oechem.oemolostream()
    if os.path.exists(writeout):
        print("File already exists: %s. Skip getting results.\n" % (finsdf))
        return (None, None)
    if not write_ofs.open(writeout):
        oechem.OEThrow.Fatal("Unable to open %s for writing" % writeout)

    # For each conformer, process output file and write new data to SDF file
    for mol in molecules:
        print("===== %s =====" % (mol.GetTitle()))
        for j, conf in enumerate(mol.GetConfs()):

            # GET DETAILS FOR SD TAGS
            props = {}  # dictionary of data for this conformer
            props['package'] = "Psi4"
            props['missing'] = False
            # change into subdirectory ./mol/conf/
            subdir = os.path.join(wdir, "%s/%s" % (mol.GetTitle(), j + 1))
            if not os.path.isdir(subdir):
                print("*** No directory found for %s ***" % (subdir))
                continue
            os.chdir(subdir)
            # Get wall clock time of the job
            try:
                props['time'] = get_psi_time(timeout)
            except IOError:
                props['time'] = "Timer output file not found"
                pass
            # process output and get dictionary results
            props = process_psi_out(psiout, props, spe)
            # if output was missing, move on
            if props['missing']:
                continue
            try:
                props['numSteps']
                props['finalEnergy']
                props['coords']
            except KeyError:
                sys.exit("ERROR: Psi4 job was incomplete in {}".format(subdir))

            # BRIEF ANALYSIS OF STRUCTURE, INTRA HBONDS
            # Set last coordinates from optimization. skip if missing.
            if 'coords' in props and len(props['coords']) != 0:
                conf.SetCoords(oechem.OEFloatArray(props['coords']))
            # _____________________

            # SET DETAILS TO WRITE MOLECULE
            # Set SD tags for this molecule
            pt.SetOptSDTags(conf, props, spe)
            # Write output file
            oechem.OEWriteConstMolecule(write_ofs, conf)
    ifs.close()
    write_ofs.close()
    os.chdir(wdir)
    try:
        return props['method'], props['basis']
    except KeyError:
        return None, None
Exemplo n.º 18
0
def get_psi_results(origsdf,
                    finsdf,
                    calctype='opt',
                    psiout="output.dat",
                    timeout="timer.dat"):
    """
    Read in OEMols (and each of their conformers) in origsdf file,
        get results from Psi4 calculations in the same directory as origsdf,
        and write out results into finsdf file.
    Directory layout is .../maindir/molName/confNumber/outputfiles .
    Both origsdf and finsdf are located in maindir.

    Parameters
    ----------
    origsdf:  string - original SDF file of input structures of QM calculation
    finsdf:   string - full name of final SDF file with optimized results.
    calctype: string; one of 'opt','spe','hess' for geometry optimization,
        single point energy calculation, or Hessian calculation
    psiout:   string - name of the Psi4 output files. Default is "output.dat"
    timeout: string - name of the Psi4 timer files. Default is "timer.dat"

    Returns
    -------
    OpenEye OEMol with data in SD tags

    None is returned if the function returns early (e.g., if output file
       already exists) or if there is KeyError from processing last
       iteration of output file (last conf of last mol).

    """

    hdir, fname = os.path.split(origsdf)
    wdir = os.getcwd()

    # check that specified calctype is valid
    if calctype not in {'opt', 'spe', 'hess'}:
        sys.exit("Specify a valid calculation type.")

    # read in sdf file and distinguish each molecule's conformers
    ifs = oechem.oemolistream()
    ifs.SetConfTest(oechem.OEAbsoluteConfTest())
    if not ifs.open(origsdf):
        sys.exit("Unable to open %s for reading" % origsdf)
    molecules = ifs.GetOEMols()

    # open outstream file
    writeout = os.path.join(wdir, finsdf)
    write_ofs = oechem.oemolostream()
    if os.path.exists(writeout):
        print("File already exists: %s. Skip getting results.\n" % (finsdf))
        return (None, None)
    if not write_ofs.open(writeout):
        oechem.OEThrow.Fatal("Unable to open %s for writing" % writeout)

    # Hessian dictionary, where hdict['molTitle']['confIndex'] has np array
    if calctype == 'hess':
        hdict = {}

    # for each conformer, process output file and write new data to SDF file
    for mol in molecules:
        print("===== %s =====" % (mol.GetTitle()))
        if calctype == 'hess':
            hdict[mol.GetTitle()] = {}

        for j, conf in enumerate(mol.GetConfs()):

            props = initiate_dict()

            # set file locations
            timef = os.path.join(hdir,
                                 "%s/%s/%s" % (mol.GetTitle(), j + 1, timeout))
            outf = os.path.join(hdir,
                                "%s/%s/%s" % (mol.GetTitle(), j + 1, psiout))

            # process output and get dictionary results
            props = get_conf_data(props, calctype, timef, outf)

            # if output was missing or are missing calculation details
            # move on to next conformer
            if props['missing'] or (calctype == 'opt' and not all(
                    key in props
                    for key in ['numSteps', 'finalEnergy', 'coords'])):
                print(
                    "ERROR reading {}\nEither Psi4 job was incomplete OR wrong calctype specified\n"
                    .format(outf))
                continue

            # add data to oemol
            conf = set_conf_data(conf, props, calctype)

            # if hessian, append to dict bc does not go to SD tag
            if calctype == 'hess':
                hdict[mol.GetTitle()][j + 1] = props['hessian']

            # check mol title
            conf = check_title(conf, origsdf)

            # write output file
            oechem.OEWriteConstMolecule(write_ofs, conf)

    # if hessian, write hdict out to separate file
    if calctype == 'hess':
        hfile = os.path.join(wdir,
                             os.path.splitext(finsdf)[0] + '.hess.pickle')
        pickle.dump(hdict, open(hfile, 'wb'))

    # close file streams
    ifs.close()
    write_ofs.close()
    try:
        return props['method'], props['basis']
    except KeyError:
        return None, None
Exemplo n.º 19
0
def getTurbResults(origsdf, theory, finsdf, spe=False):
    """

    """
    wdir = os.getcwd()
    p = sp.call('module load turbomole/7.1/intel', shell=True)

    method = theory.split('/')[0]
    basisset = theory.split('/')[1]

    ### Read in .sdf file and distinguish each molecule's conformers
    ifs = oechem.oemolistream()
    ifs.SetConfTest(oechem.OEAbsoluteConfTest())
    if not ifs.open(origsdf):
        oechem.OEThrow.Warning("Unable to open %s for reading" % origsdf)
        return

    ### Open outstream file.
    writeout = os.path.join(wdir, finsdf)
    write_ofs = oechem.oemolostream()
    if os.path.exists(writeout):
        print("File already exists: %s. Skip getting results.\n" % (finsdf))
        return (None, None)
    if not write_ofs.open(writeout):
        oechem.OEThrow.Fatal("Unable to open %s for writing" % writeout)

    for mol in ifs.GetOEMols():
        print("===== %s =====" % (mol.GetTitle()))
        for i, conf in enumerate(mol.GetConfs()):
            props = {}  # dictionary of data for this conformer
            props['package'] = "Turbomole"
            props['method'] = method
            props['basis'] = basisset

            # change into subdirectory
            subdir = os.path.join(wdir, "%s/%s" % (mol.GetTitle(), i + 1))
            if not os.path.isdir(subdir):
                sys.exit("No subdirectories found, are you in the right dir?")
            os.chdir(subdir)

            # get time and final coordinates
            props['time'] = GetTime()
            if not os.path.exists('coord'):
                print("Error: the 'coord' file does not exist!")
                continue
            p = sp.Popen('t2x -c > final.xyz', shell=True)
            p.wait()

            # read in xyz file into another mol and transfer coords
            xfs = oechem.oemolistream()
            xfs.SetConfTest(oechem.OEAbsoluteConfTest())
            if xfs.open('final.xyz'):
                xmol = next(xfs.GetOEMols())
                conf.SetCoords(xmol.GetCoords())
            else:
                oechem.OEThrow.Warning(
                    "Unable to open 'final.xyz' for reading")
            xfs.close()

            # process output and get dictionary results
            props = process_turb_out(props, spe, cosmo)
            pt.SetOptSDTags(conf, props, spe)
            oechem.OEWriteConstMolecule(write_ofs, conf)
    ifs.close()

    write_ofs.close()

    try:
        return props['method'], props['basis']
    except KeyError:
        return None, None

    ifs.close()
Exemplo n.º 20
0
Arquivo: am1wib.py Projeto: vtlim/misc
def am1wib(insdf, outdat, plotout=None):
    """
    Parameters
    ----------
    insdf: string, name of SDF file

    """
    outf = open(outdat, 'w')
    ### Read in .sdf file and distinguish each molecule's conformers
    ifs = oechem.oemolistream()
    ifs.SetConfTest(oechem.OEAbsoluteConfTest())
    if not ifs.open(insdf):
        oechem.OEThrow.Warning("Unable to open %s for reading" % insdf)
        return

    angList = []  # for plotting
    labelList = []  # for plotting
    for mol in ifs.GetOEMols():
        molName = mol.GetTitle()
        outf.write('\n\n>>> Molecule: %s\tNumConfs: %d' %
                   (molName, mol.NumConfs()))
        for i, conf in enumerate(mol.GetConfs()):

            ### AM1-BCC charge calculation
            charged_copy = oechem.OEMol(mol)
            status = oequacpac.OEAssignPartialCharges(
                charged_copy, oequacpac.OECharges_AM1BCCSym, False, False)
            if not status:
                raise (RuntimeError(
                    "OEAssignPartialCharges returned error code %s" % status))

            ### Our copy has the charges we want but not the right conformation.
            ### Copy charges over. Also copy over Wiberg bond orders.
            partial_charges = []
            partial_bondorders = []
            for atom in charged_copy.GetAtoms():
                partial_charges.append(atom.GetPartialCharge())
            for (idx, atom) in enumerate(mol.GetAtoms()):
                atom.SetPartialCharge(partial_charges[idx])
            for bond in charged_copy.GetBonds():
                partial_bondorders.append(bond.GetData("WibergBondOrder"))
            for (idx, bond) in enumerate(mol.GetBonds()):
                bond.SetData("WibergBondOrder", partial_bondorders[idx])

            ### Sum angles around each invertible N, and get Wiberg bond order.
            for atom in conf.GetAtoms(oechem.OEIsInvertibleNitrogen()):
                aidx = atom.GetIdx()
                nbors = list(atom.GetAtoms())
                ang1 = math.degrees(
                    oechem.OEGetAngle(conf, nbors[0], atom, nbors[1]))
                ang2 = math.degrees(
                    oechem.OEGetAngle(conf, nbors[1], atom, nbors[2]))
                ang3 = math.degrees(
                    oechem.OEGetAngle(conf, nbors[2], atom, nbors[0]))
                ang_sum = math.fsum([ang1, ang2, ang3])
                outf.write("\n\n%s: sum of angles for N, index %d: %f" %
                           (molName, aidx, ang_sum))
                angList.append(ang_sum)
                labelList.append("{}_{}_{}".format(molName, i, aidx))

                for bond in atom.GetBonds():
                    nbor = bond.GetNbr(atom)
                    nidx = nbor.GetIdx()
                    nbor_wib = bond.GetData('WibergBondOrder')
                    outf.write(
                        "\n{}: wiberg bond order for indices {} {}: {}".format(
                            molName, aidx, nidx, nbor_wib))

    if plotout is not None:
        with open(plotout, 'w') as f:
            lis = [list(range(len(angList))), angList, labelList]
            for x in zip(*lis):
                f.write("{0}\t{1}\t{2}\n".format(*x))

    ifs.close()
Exemplo n.º 21
0
def getRMSD(sdfRef, theory, rmsdict, package='Psi4'):
    """
    Perform RMSD calculation from an SDF file for molecule and its conformers.

    sdfRef: string, pathname of the SDF file with energies of opt 1 and opt 2
    theory: string, level of theory in format of mp2/6-31G*
    rmsdict: dictionary (can be empty) which will be populated in form of
             rmsdict[theory][molName] = 0.000  if the RMSD of before/after energies are 0.000
    package: string, name of software package used for QM calculation. only Psi4 currently supported

    """

    method, basis = theory.split('/')[0].strip(), theory.split('/')[1].strip()

    # create a molecule read in stream
    print("Opening SDF file %s" % sdfRef)
    ifsRef = oechem.oemolistream()
    ifsRef.SetConfTest(oechem.OEAbsoluteConfTest())
    if not ifsRef.open(sdfRef):
        oechem.OEThrow.Fatal("Unable to open %s for reading" % sdfRef)
    # store all molecules in molsRef
    molsRef = ifsRef.GetOEMols()

    # create file object for output RMSD calculation
    RMSD = open("RMSD.txt", 'a')
    RMSD.write("\nAnalyzing file: %s\n# Level of theory: %s\n" %
               (sdfRef, theory))

    # create file object for initial and final energies
    energies = open("energies_breakdown.txt", 'a')
    maximum = open("maxenergies.txt", "a")

    # Grab energies, perform RMSD calculation, write data to txt files.
    for rmol in molsRef:
        molName = rmol.GetTitle()
        tmol = np.asarray(pt.get_sd_list(rmol, 'QM opt energy', 'Psi4', method,
                                         basis),
                          dtype=float)
        imol = np.asarray(pt.get_sd_list(rmol, 'QM opt energy initial', 'Psi4',
                                         method, basis),
                          dtype=float)
        final = tmol.copy()
        initial = imol.copy()

        # subtract conformer[0] energies from all conformers
        try:
            tmol -= tmol[0]
        except IndexError as e:
            sys.exit("No energies found for {} {}/{}! Check that data is \
stored in tags. Exiting.".format(rmol.GetTitle(), method, basis))
        imol -= imol[0]

        #subtracts initial minus final and sqaures all values
        fmol = np.subtract(tmol, imol)
        fmol = fmol[~np.isnan(fmol)]
        fmol = np.square(fmol)

        #sums all energies of conformers for given rmol and then takes average with respect to n-1 number of conformers
        tot = 0
        for n in fmol:
            tot += n
        average = math.sqrt(tot / (fmol.size - 1))

        #convert average from Hartree to Kcal/mol
        average = average * 627.5095

        # puts RMSD values into .txt file, and store in dict for plotting.
        RMSD.write("#%s\t%.5f RMSD(Kcal/mol)\n" % (molName, average))
        rmsdict[theory][molName] = average

        # store energies of initial and final for molecules conformers in energies.txt
        energies.write(
            "\n#%s\n#%s\n#RMSD = %.5f(y)\t\t(x=Hartree, y=kcal/mol)\n#conf. init. Energy(x)  \t final Energy(x) \t diff.(x)\tdiff. (y) \n"
            % (theory, molName, average))

        # get list of conformer indices to identify high RMSD ones
        conflist = pt.get_sd_list(rmol, "original index", package, method,
                                  basis)
        conformer = []
        for item in conflist:
            conformer.append(item.split(',')[0])  # append orig conf
        conformer = np.asarray(conformer, dtype=int)
        difference = np.array([])
        for i in range(len(tmol)):
            energies.write(
                "%r \t %5.9f \t %5.9f \t %5.9f\t%5.9f \n" %
                (conformer[i], initial[i], final[i], final[i] - initial[i],
                 (final[i] - initial[i]) * 627.5095))
            difference = np.append(difference,
                                   [(final[i] - initial[i]) * 627.5095])

        # find max 3 confs with highest RMSDs
        try:
            difference = np.absolute(difference)
            confmax1 = (np.nanargmax(difference))
            # set max conf to zero to find next highest
            difference[confmax1] = 0

            difference = np.absolute(difference)
            confmax2 = (np.nanargmax(difference))
            difference[confmax2] = 0

            difference = np.absolute(difference)
            confmax3 = (np.nanargmax(difference))
            difference[confmax3] = 0

            max1 = conformer[confmax1]
            max2 = conformer[confmax2]
            max3 = conformer[confmax3]
        except ValueError as e:
            #print("ValueError: {}".format(e))
            # TODO don't plot this mol for all nan's
            print("All RMSDs in list for file {} mol {} are nan!!!".format(
                sdfRef, molName))
            max1 = max2 = max3 = -1

        energies.write(
            "#*** Max energy differences are conformers (hi-->low): %r, %r, %r ***\n\n"
            % (max1, max2, max3))
        maximum.write("%s, %s : %r, %r, %r\n" %
                      (theory, molName, max1, max2, max3))

    maximum.close()
    RMSD.close()
    energies.close()

    return rmsdict