def align2d(file1, file2): atomexpr = oechem.OEExprOpts_AtomicNumber | oechem.OEExprOpts_RingMember bondexpr = oechem.OEExprOpts_RingMember ifs1 = oechem.oemolistream(file1) ifs2 = oechem.oemolistream(file2) ifs1.SetConfTest(oechem.OEAbsCanonicalConfTest()) ifs2.SetConfTest(oechem.OEAbsCanonicalConfTest()) popts, dopts, report = prep_pdf_writer() for mol1, mol2 in zip(ifs1.GetOEMols(), ifs2.GetOEMols()): oechem.OESuppressHydrogens(mol1) oechem.OESuppressHydrogens(mol2) oechem.OEGenerate2DCoordinates(mol2) ss = oechem.OESubSearch(mol2, atomexpr, bondexpr) oechem.OEPrepareSearch(mol1, ss) alignres = oedepict.OEPrepareAlignedDepiction(mol1, ss) if not alignres.IsValid(): oechem.OEThrow.Error( "Substructure is not found in input molecule!") cell1 = report.NewCell() cell2 = report.NewCell() oedepict.OEPrepareDepiction(mol1, popts) oedepict.OEPrepareDepiction(mol2, popts) disp1 = oedepict.OE2DMolDisplay(mol1, dopts) disp2 = oedepict.OE2DMolDisplay(mol2, dopts) oedepict.OERenderMolecule(cell1, disp1) oedepict.OERenderMolecule(cell2, disp2) ofs = oechem.oeofstream() if not ofs.open('output.pdf'): oechem.OEThrow.Fatal("Cannot open output file!") oedepict.OEWriteReport(ofs, "pdf", report)
def find_string_tag(infile): # read input file ifs = oechem.oemolistream() ifs.SetConfTest(oechem.OEAbsCanonicalConfTest()) if not ifs.open(infile): oechem.OEThrow.Warning("Unable to open input file for reading") # loop through and evaluate tags for mol in ifs.GetOEMols(): for conf in mol.GetConfs(): mytag = oechem.OEGetSDData(conf, 'SMILES QCArchive') count1 = mytag.count('S') count2 = mytag.count('P') count3 = mytag.count('C#N') count4 = mytag.count('N/N') print(f"{conf.GetTitle()}\t{count1}\t{count2}\t{count3}\t{count4}")
def main(infile): # open multi-molecule, multi-conformer file ifs = oechem.oemolistream() ifs.SetConfTest(oechem.OEAbsCanonicalConfTest()) if not ifs.open(infile): raise FileNotFoundError(f"Unable to open {infile} for reading") mols = ifs.GetOEMols() for i, mol in enumerate(mols): # perceive stereochemistry for mol oechem.OEPerceiveChiral(mol) oechem.OEAssignAromaticFlags(mol, oechem.OEAroModel_MDL) # assign charges to copy of mol # note that chg_mol does NOT have conformers try: chg_mol = charge_mol(mol) except RuntimeError: # perceive stereochem #find_unspecified_stereochem(mol) oechem.OE3DToInternalStereo(mol) # reset perceived and call OE3DToBondStereo, since it may be missed # by OE3DToInternalStereo if it thinks mol is flat mol.ResetPerceived() oechem.OE3DToBondStereo(mol) try: chg_mol = charge_mol(mol) print(f'fixed stereo: {mol.GetTitle()}') except RuntimeError: find_unspecified_stereochem(mol) title = mol.GetTitle() smilabel = oechem.OEGetSDData(mol, "SMILES QCArchive") print(' >>> Charge assignment failed due to unspecified ' f'stereochemistry {title} {smilabel}') continue
def main(infile, ffxml): # open multi-molecule, multi-conformer file ifs = oechem.oemolistream() ifs.SetConfTest(oechem.OEAbsCanonicalConfTest()) if not ifs.open(infile): raise FileNotFoundError(f"Unable to open {infile} for reading") mols = ifs.GetOEMols() for i, mol in enumerate(mols): # perceive stereochemistry for mol oechem.OEPerceiveChiral(mol) oechem.OEAssignAromaticFlags(mol, oechem.OEAroModel_MDL) for j, conf in enumerate(mol.GetConfs()): # perceive sterochemistry for conf coordinates oechem.OE3DToInternalStereo(conf) min_ffxml(conf, ffxml)
def master(): mol = oechem.OEMol() ifs = oechem.oemolistream(input_smiles_file) ifs.SetConfTest(oechem.OEAbsCanonicalConfTest()) for pos, mol in enumerate(ifs.GetOEMols()): smiles = oechem.OEMol(mol) ligand_name = smiles.GetTitle() status = MPI.Status() comm.recv(source=MPI.ANY_SOURCE, tag=WORKTAG, status=status) rank_from = status.Get_source() data = (pos, smiles, ligand_name) comm.send(data, dest=rank_from, tag=WORKTAG) if args.v == 1 and pos % 1000 == 0: print("sent", pos, "jobs", flush=True) for i in range(1, world_size): comm.send([], dest=i, tag=DIETAG) comm.Barrier()
def read_mols(in_file, mol_slice=None): """ Open a molecule file and return molecules and conformers as OEMols. Provide option to slice the mols to return only a chunk from the specified indices. Parameters ---------- in_file : string name of input file with molecules mol_slice : numpy slice object The resulting integers are numerically sorted and duplicates removed. e.g., slices = np.s_[0, 3:5, 6::3] would be parsed to return [0, 3, 4, 6, 9, 12, 15, 18, ...] Can also parse from end: [-3:] gets the last 3 molecules, and [-2:-1] is the same as [-2] to get just next to last molecule. Returns ------- mols : OEMols """ def flatten(x): # https://stackoverflow.com/questions/2158395/flatten-an-irregular-list-of-lists if isinstance(x, collections.Iterable): return [a for i in x for a in flatten(i)] else: return [x] ifs = oechem.oemolistream() ifs.SetConfTest(oechem.OEAbsCanonicalConfTest()) if not ifs.open(in_file): raise FileNotFoundError(f"Unable to open {in_file} for reading") mols = ifs.GetOEMols() if mol_slice is None: return mols # set max number of molecules for decoding slices # TODO: how to get num_mols without re-reading file and loading all mols ifs2 = oechem.oemolistream() ifs2.SetConfTest(oechem.OEAbsCanonicalConfTest()) ifs2.open(in_file) mols2 = ifs2.GetOEMols() num_mols = len(list(mols2)) # parse mol_slice for multiple slice definitions provided # e.g., (1, 4, 8) for second, fifth, and ninth molecules # e.g., (0, slice(3, 5, None), slice(6, None, 3)) for example in docs if isinstance(mol_slice, tuple) or isinstance(mol_slice, list): idx_to_keep = [] for s in mol_slice: # parse the slice object if isinstance(s, slice): idx_to_keep.append(list(range(num_mols))[s]) # else decode the negative int to positive int elif isinstance(s, int) and s < 0: idx_to_keep.append(s + num_mols) # else just append the positive int elif isinstance(s, int): idx_to_keep.append(s) else: raise ValueError( f"ERROR in parsing 'mol_slice' from {mol_slice}" f" due to {s} being neither slice nor int") # flatten to 1d, use set to remove duplicates, then sort list idx_to_keep = list(set(flatten(idx_to_keep))) idx_to_keep.sort() #print(idx_to_keep) elif isinstance(mol_slice, slice): # parse the slice object idx_to_keep = list(range(num_mols))[mol_slice] # else just store the single value in a list elif isinstance(mol_slice, int): if mol_slice < 0: mol_slice = mol_slice + num_mols idx_to_keep = list(mol_slice) else: raise ValueError(f"ERROR in parsing 'mol_slice' from {mol_slice}") # go through the generator and retrive the specified slices mlist = [] for i, m in enumerate(mols): if i in idx_to_keep: # append a copy else still linked to orig generator mlist.append(copy.copy(m)) # if this index is the last one in idx_to_keep, finish now if i == idx_to_keep[-1]: return mlist return mlist
def main(infile, outfile, ffxml, minimizer): # open multi-molecule, multi-conformer file ifs = oechem.oemolistream() ifs.SetConfTest(oechem.OEAbsCanonicalConfTest()) if not ifs.open(infile): raise FileNotFoundError(f"Unable to open {infile} for reading") mols = ifs.GetOEMols() # open an outstream file ofs = oechem.oemolostream() if os.path.exists(outfile): raise FileExistsError("Output file {} already exists in {}".format( outfile, os.getcwd())) if not ofs.open(outfile): oechem.OEThrow.Fatal("Unable to open %s for writing" % outfile) # minimize with openforcefield ffxml file for i, mol in enumerate(mols): # perceive stereochemistry for mol oechem.OEPerceiveChiral(mol) oechem.OEAssignAromaticFlags(mol, oechem.OEAroModel_MDL) # assign charges to copy of mol # note that chg_mol does NOT have conformers try: chg_mol = charge_mol(mol) except RuntimeError: # perceive stereochem #find_unspecified_stereochem(mol) oechem.OE3DToInternalStereo(mol) # reset perceived and call OE3DToBondStereo, since it may be missed # by OE3DToInternalStereo if it thinks mol is flat mol.ResetPerceived() oechem.OE3DToBondStereo(mol) try: chg_mol = charge_mol(mol) print(f'fixed stereo: {mol.GetTitle()}') except RuntimeError: title = mol.GetTitle() smilabel = oechem.OEGetSDData(mol, "SMILES QCArchive") print(' >>> Charge assignment failed due to unspecified ' f'stereochemistry {title} {smilabel}') continue for j, conf in enumerate(mol.GetConfs()): # perceive sterochemistry for conf coordinates oechem.OE3DToInternalStereo(conf) # assign charges to the conf itself chg_conf = charge_conf(chg_mol, conf) if minimizer == 'ffxml': # minimize with parsley (charges set by ff not used from conf) min_ffxml(chg_conf, ofs, ffxml) if minimizer == 'mmff94': # minimize with mmff94 min_mmff94x(chg_conf, ofs, mmff94s=False) if minimizer == 'mmff94s': # minimize with mmff94S min_mmff94x(chg_conf, ofs, mmff94s=True) if minimizer == 'gaff': # minimize with gaff min_gaffx(chg_conf, ofs, gaff2=False) if minimizer == 'gaff2': # minimize with gaff2 min_gaffx(chg_conf, ofs, gaff2=True) ifs.close() ofs.close()