def main(): import argparse parser = argparse.ArgumentParser() parser.add_argument('--scratch', action='store', help='', metavar="dir", default="tmp2/") parser.add_argument('--randomseed', action='store', help='random seed', metavar="int", default=666) parser.add_argument('--sdf', action='store', help='', metavar="file") parser.add_argument('-j', '--cpu', action='store', help='pararallize', metavar="int", default=0) args = parser.parse_args() molecules = cheminfo.read_sdffile('data/sdf/structures.sdf.gz') properties = open('data/sdf/properties.csv', 'r') sub_mol, sub_prop, idxs = search_molcules(molecules, properties) properties.close() fm = open('data/sdf/subset_structures.sdf', 'w') fp = open('data/sdf/subset_properties.csv', 'w') for mol, prop in zip(sub_mol, sub_prop): sdf = cheminfo.molobj_to_sdfstr(mol) fm.write(sdf) fm.write("$$$$\n") fp.write(str(prop) + "\n") fm.close() fp.close() for i, idx in enumerate(idxs): from_dir = "_tmp_ensemble_/" to_dir = "_tmp_subset_/conformers/" cmd = "cp {:}{:}.sdf {:}{:}.sdf".format(from_dir, str(idx), to_dir, str(i)) list(misc.shell(cmd)) cmd = "cp {:}{:}.energies.npy {:}{:}.energies.npy".format( from_dir, str(idx), to_dir, str(i)) list(misc.shell(cmd)) print(cmd) return
def merge_sdfs(filenames): molobjs = [] energies = [] coordinates = [] representations = [] atoms = [] n_total = 0 for filename in filenames: try: molobjs_next, energies_next, coordinates_next, representations_next = generate_sdf( filename) except: continue if len(molobjs) == 0: atoms, coord = cheminfo.molobj_to_xyz(molobjs_next[0]) energies += energies_next coordinates += coordinates_next representations += representations_next molobjs += molobjs_next n_total += len(molobjs_next) continue if args.debug: print(" {:} = {:} confs".format(filename, len(molobjs_next))) idxs = merge_asymmetric(atoms, energies_next, energies, representations_next, representations) n_new = 0 for i, idxl in enumerate(idxs): N = len(idxl) if N > 0: continue energies.append(energies_next[i]) coordinates.append(coordinates_next[i]) representations.append(representations_next[i]) molobjs.append(molobjs_next[i]) n_new += 1 if args.debug: n_total += n_new print(" - new", n_new) print("total", n_total) if args.dump: sdfstr = [cheminfo.molobj_to_sdfstr(molobj) for molobj in molobjs] sdfstr = "".join(sdfstr) print(sdfstr) return
def get_conformations(line, scr="_tmp_ensemble_/", **kwargs): im, molecule = line # smi = Chem.MolToSmiles(molecule) energies = generate_conformers(molecule) misc.save_npy(scr + str(im) + ".energies", energies) txtsdf = cheminfo.molobj_to_sdfstr(molecule) fsdf = open(scr + str(im) + ".sdf", 'w') fsdf.write(txtsdf) fsdf.close() print(im, "{:} {:5.2f} {:5.2f}".format("smi", energies.mean(), energies.std())) return
def dump_sdf(molobj, energies, coordinates, costs): hel = molobj.SetProp('_Name', '') dumpstr = "" for energy, coord, cost in zip(energies, coordinates, costs): # Set coordinates cheminfo.molobj_set_coordinates(molobj, coord) molobj.SetProp('Energy', str(energy)) molobj.SetProp('Cost', str(cost)) sdfstr = cheminfo.molobj_to_sdfstr(molobj) dumpstr += sdfstr print(dumpstr) return
def ajax_submitquantum(request): """ Setup quantum calculation """ if not request.POST: return { 'error': 'Error 128 - empty post', 'message': "Error. Empty post." } if not request.POST["sdf"]: return { 'error': 'Error 132 - sdf key error', 'message': "Error. Missing information." } # Get coordinates from request sdfstr = request.POST["sdf"].encode('utf-8') # Get rdkit molobj, status = cheminfo.sdfstr_to_molobj(sdfstr) if molobj is None: status = status.split("]") status = status[-1] return {'error': 'Error 141 - rdkit error', 'message': status} try: conf = molobj.GetConformer() except ValueError: # Error return { 'error': 'Error 141 - rdkit error', 'message': "Error. Server was unable to generate conformations for this molecule" } # If hydrogens not added, assume graph and optimize with forcefield atoms = cheminfo.molobj_to_atoms(molobj) if 1 not in atoms: molobj = cheminfo.molobj_add_hydrogens(molobj) cheminfo.molobj_optimize(molobj) # TODO Check lengths of atoms # TODO Define max in settings # Fix sdfstr sdfstr = sdfstr.decode('utf8') for _ in range(3): i = sdfstr.index('\n') sdfstr = sdfstr[i + 1:] sdfstr = "\n" * 3 + sdfstr # hash on sdf (conformer) hshobj = hashlib.md5(sdfstr.encode()) hashkey = hshobj.hexdigest() calculation = request.dbsession.query(models.GamessCalculation) \ .filter_by(hashkey=hashkey).first() if calculation is not None: msg = {'hashkey': hashkey} calculation.created = datetime.datetime.now() return msg print("new:", hashkey) molecule_info = {"sdfstr": sdfstr, "molobj": molobj, "hashkey": hashkey} msg = pipelines.gamess_quantum_pipeline(request, molecule_info) return msg # # # calculation = request.dbsession.query(models.GamessCalculation) \ .filter_by(hashkey=hashkey).first() if calculation is not None: calculation.created = datetime.datetime.now() return msg else: pass # check if folder exists here = os.path.abspath(os.path.dirname(__file__)) + "/" datahere = here + "data/" if os.path.isdir(datahere + hashkey): # return msg pass else: os.mkdir(datahere + hashkey) os.chdir(datahere + hashkey) # Minimize with forcefield first molobj = cheminfo.molobj_add_hydrogens(molobj) cheminfo.molobj_optimize(molobj) header = """ $basis gbasis=pm3 $end $contrl runtyp=optimize icharg=0 $end $statpt opttol=0.0005 nstep=200 projct=.F. $end """ # Prepare gamess input # inpstr = gamess.molobj_to_gmsinp(molobj, header) # Save and run file # with open("optimize.inp", "w") as f: # f.write(inpstr) # # stdout, stderr = gamess.calculate(hashkey+".inp", store_output=False) # with open("start.sdf", 'w') as f: # f.write(cheminfo.molobj_to_sdfstr(molobj)) # Check output # status, message = gamess.check_output(stdout) os.chdir(here) # if not status: # msg["error"] = "error 192: QM Calculation fail" # msg["message"] = message # return msg # Saveable sdf and reset title sdfstr = cheminfo.molobj_to_sdfstr(molobj) sdfstr = str(sdfstr) for _ in range(2): i = sdfstr.index('\n') sdfstr = sdfstr[i + 1:] sdfstr = "\n\n" + sdfstr # Get a 2D Picture # TODO Compute 2D coordinates svgstr = cheminfo.molobj_to_svgstr(molobj, removeHs=True) # Success, setup database calculation = models.GamessCalculation() calculation.smiles = smiles calculation.hashkey = hashkey calculation.sdf = sdfstr calculation.svg = svgstr calculation.created = datetime.datetime.now() # Add calculation to the database request.dbsession.add(calculation) # Add smiles to counter countobj = request.dbsession.query(models.Counter) \ .filter_by(smiles=smiles).first() if countobj is None: counter = models.Counter() counter.smiles = smiles counter.count = 1 request.dbsession.add(counter) print(counter) else: countobj.count += 1 return msg
def gamess_quantum_pipeline(request, molinfo): """ Assumed that rdkit understands the molecule """ # TODO Read gamess settings from ini # Read input molobj = molinfo["molobj"] sdfstr = molinfo["sdfstr"] if "name " in request.POST: name = request.POST["name"].encode('utf-8') else: name = None # Get that smile on your face smiles = cheminfo.molobj_to_smiles(molobj, remove_hs=True) # hash on sdf (conformer) hshobj = hashlib.md5(sdfstr.encode()) hashkey = hshobj.hexdigest() # Start respond message msg = {"smiles": smiles, "hashkey": hashkey} # Check if calculation already exists if False: calculation = request.dbsession.query(models.GamessCalculation) \ .filter_by(hashkey=hashkey).first() if calculation is not None: calculation.created = datetime.datetime.now() return msg # Create new calculation calculation = models.GamessCalculation() # check if folder exists here = os.path.abspath(os.path.dirname(__file__)) + "/" datahere = here + "data/" if not os.path.isdir(datahere + hashkey): os.mkdir(datahere + hashkey) os.chdir(datahere + hashkey) # GAMESS DEBUG # TODO Add error messages when gamess fails # TODO add timeouts for all gamess calls # Optimize molecule gmsargs = { "scr": datahere + hashkey, "autoclean": True, "debug": False, } properties = gamess.calculate_optimize(molobj, **gmsargs) if properties is None: return { 'error': 'Error g-80 - gamess optimization error', 'message': "Error. Server was unable to optimize molecule" } print(smiles, list(properties.keys())) # Save and set coordinates coord = properties["coord"] calculation.coordinates = save_array(coord) calculation.enthalpy = properties["h"] cheminfo.molobj_set_coordinates(molobj, coord) # Optimization is finished, do other calculation async-like # Vibrate molecule vibheader = """ $basis gbasis=PM3 $end $contrl scftyp=RHF runtyp=hessian icharg={:} maxit=60 $end """ orbheader = """ $contrl coord=cart units=angs scftyp=rhf icharg={:} maxit=60 $end $basis gbasis=sto ngauss=3 $end """ solheader = """ $system mwords=125 $end $basis gbasis=PM3 $end $contrl scftyp=RHF runtyp=energy icharg={:} $end $pcm solvnt=water mxts=15000 icav=1 idisp=1 $end $tescav mthall=4 ntsall=60 $end """ headers = [vibheader, orbheader, solheader] readers = [ gamess.read_properties_vibration, gamess.read_properties_orbitals, gamess.read_properties_solvation ] def procfunc(conn, reader, *args, **kwargs): stdout, status = gamess.calculate(*args, **kwargs) try: properties = reader(stdout) except: # TODO Error reading properties properties = None conn.send(properties) conn.close() procs = [] conns = [] for header, reader in zip(headers, readers): parent_conn, child_conn = Pipe() p = Process(target=procfunc, args=(child_conn, reader, molobj, header), kwargs=gmsargs) p.start() procs.append(p) conns.append(parent_conn) for proc in procs: proc.join() properties_vib = conns[0].recv() properties_orb = conns[1].recv() properties_sol = conns[2].recv() if properties_vib is None: return { 'error': 'Error g-104 - gamess vibration error', 'message': "Error. Server was unable to vibrate molecule" } print(smiles, list(properties_vib.keys())) calculation.islinear = properties_vib["linear"] calculation.vibjsmol = properties_vib["jsmol"] calculation.vibfreq = save_array(properties_vib["freq"]) calculation.vibintens = save_array(properties_vib["intens"]) calculation.thermo = save_array(properties_vib["thermo"]) if properties_orb is None: return { 'error': 'Error g-128 - gamess orbital error', 'message': "Error. Server was unable to orbital the molecule" } print(smiles, list(properties_orb.keys())) calculation.orbitals = save_array(properties_orb["orbitals"]) calculation.orbitalstxt = properties_orb["stdout"] if properties_sol is None: return { 'error': 'Error g-159 - gamess solvation error', 'message': "Error. Server was unable to run solvation calculation" } # 'charges', 'solvation_total', 'solvation_polar', 'solvation_nonpolar', # 'surface', 'total_charge', 'dipole', 'dipole_total' print(smiles, list(properties_sol.keys())) charges = properties_sol["charges"] calculation.charges = save_array(charges) calculation.soltotal = properties_sol["solvation_total"] calculation.solpolar = properties_sol["solvation_polar"] calculation.solnonpolar = properties_sol["solvation_nonpolar"] calculation.solsurface = properties_sol["surface"] calculation.soldipole = save_array(properties_sol["dipole"]) calculation.soldipoletotal = properties_sol["dipole_total"] # GAMESS DEBUG os.chdir(here) # Saveable sdf and reset title sdfstr = cheminfo.molobj_to_sdfstr(molobj) sdfstr = str(sdfstr) for _ in range(2): i = sdfstr.index('\n') sdfstr = sdfstr[i + 1:] sdfstr = "\n\n" + sdfstr # Save mol2 fmt mol2 = cheminfo.molobj_to_mol2(molobj, charges=charges) calculation.mol2 = mol2 # Get a 2D Picture # TODO Compute 2D coordinates svgstr = cheminfo.molobj_to_svgstr(molobj, removeHs=True) # Success, setup database # calculation = models.GamessCalculation() calculation.smiles = smiles calculation.hashkey = hashkey calculation.sdf = sdfstr calculation.svg = svgstr calculation.created = datetime.datetime.now() # Add calculation to the database request.dbsession.add(calculation) # Add smiles to counter countobj = request.dbsession.query(models.Counter) \ .filter_by(smiles=smiles).first() if countobj is None: counter = models.Counter() counter.smiles = smiles counter.count = 1 request.dbsession.add(counter) else: countobj.count += 1 return msg
def main(): import argparse parser = argparse.ArgumentParser() parser.add_argument('--scratch', action='store', help='', metavar="DIR", default="_tmp_") parser.add_argument('--sdf', action='store', help='', metavar="FILE") #, nargs="+", default=[]) parser.add_argument('--properties', action='store', help='', metavar="FILE") #, nargs="+", default=[]) parser.add_argument('-j', '--procs', action='store', help='pararallize', metavar="int", default=0, type=int) args = parser.parse_args() if args.scratch[-1] != "/": args.scratch += "/" fsdf = gzip.open(args.scratch + "structures.sdf.gz", 'w') fprop = open(args.scratch + "properties.csv", 'w') molecules = cheminfo.read_sdffile(args.sdf) properties = open(args.properties, 'r') moledict = {} for molobj, line in zip(molecules, properties): status = molobjfilter(molobj) if not status: continue status = valuefilter(line) if not status: continue smiles = cheminfo.molobj_to_smiles(molobj, remove_hs=True) print(smiles) sdfstr = cheminfo.molobj_to_sdfstr(molobj) sdfstr += "$$$$\n" fsdf.write(sdfstr.encode()) fprop.write(line) values = [float(x) for x in line.split()[1:]] moledict[smiles] = values fsdf.close() fprop.close() properties.close() misc.save_json(args.scratch + "molecules", moledict) misc.save_obj(args.scratch + "molecules", moledict) return
def calculate_forcefield(molobj, conformer, torsions, origin_angles, delta_angles, ffprop=None, ff=None, delta=10**-7, coord_decimals=6, grad_threshold=100): """ Disclaimer: lots of hacks, sorry. Let me know if you have an alternative. Note: There is a artificat where if delta < 10**-16 the FF will find a *extremely* local minima with very high energy (un-physical)the FF will find a *extremely* local minima with very high energy (un-physical). Setting delta to 10**-6 (numerical noise) should fix this. Note: rdkit forcefield restrained optimization will optimized to a *very* local and very unphysical minima which the global optimizer cannot get out from. Truncating the digits of the coordinates to six is a crude but effective way to slight move the the molecule out of this in a reproducable way. """ if ffprop is None or ff is None: ffprop, ff = get_forcefield(molobj) sdfstr = cheminfo.molobj_to_sdfstr(molobj) molobj_prime, status = cheminfo.sdfstr_to_molobj(sdfstr) conformer_prime = molobj_prime.GetConformer() # Setup constrained forcefield # ffprop_prime, ffc = get_forcefield(molobj_prime) ffc = ChemicalForceFields.MMFFGetMoleculeForceField(molobj_prime, ffprop) # Set angles and constrains for all torsions for i, angle in enumerate(delta_angles): set_angle = origin_angles[i] + angle # Set clockwork angle try: Chem.rdMolTransforms.SetDihedralDeg(conformer_prime, *torsions[i], set_angle) except: pass # Set forcefield constrain ffc.MMFFAddTorsionConstraint(*torsions[i], False, set_angle-delta, set_angle+delta, 1.0e10) # minimize constrains status = run_forcefield(ffc, 500) # Set result coordinates = conformer_prime.GetPositions() coordinates = np.round(coordinates, coord_decimals) # rdkit hack, read description cheminfo.conformer_set_coordinates(conformer, coordinates) # minimize global status = run_forcefield_prime(ff, 700, force=1e-4) # Get current energy energy = ff.CalcEnergy() if status == 0: grad = ff.CalcGrad() grad = np.array(grad) grad_norm = linalg.norm(grad) if grad_norm > grad_threshold: status = 4 debug = False if energy > 1000 and debug: print(torsions, origin_angles, delta_angles) print(energy, status) print("id") print(id(molobj_prime)) print(id(molobj)) molobj_test, status = cheminfo.sdfstr_to_molobj(sdfstr) coordinates = conformer.GetPositions() cheminfo.molobj_set_coordinates(molobj_test, coordinates) ffprop_t, ff_t = get_forcefield(molobj) run_forcefield(ff_t, 500) print(coordinates) for idxs in torsions: angle = Chem.rdMolTransforms.GetDihedralDeg(conformer, *idxs) print("ANGLE 1", angle) f = open("_test_dumpsdf.sdf", 'w') sdf = cheminfo.save_molobj(molobj) f.write(sdf) # prop, ff = get_forcefield(molobj) # status = run_forcefield(ff, 500) conformer = molobj_test.GetConformer() for idxs in torsions: angle = Chem.rdMolTransforms.GetDihedralDeg(conformer, *idxs) print("ANGLE 2",angle) print(energy, status) sdf = cheminfo.save_molobj(molobj_test) f.write(sdf) f.close() quit() # Get current positions pos = conformer.GetPositions() return energy, pos, status
def calculate_mopac(molobj, conformer, torsions, origin_angles, delta_angles, delta=10**-7, coord_decimals=6, atoms=None, ffprop=None, reference_smiles=None): sdfstr = cheminfo.molobj_to_sdfstr(molobj) molobj_prime, status = cheminfo.sdfstr_to_molobj(sdfstr) conformer_prime = molobj_prime.GetConformer() # Setup constrained forcefield # ffprop_prime, ffc = get_forcefield(molobj_prime) ffc = ChemicalForceFields.MMFFGetMoleculeForceField(molobj_prime, ffprop) # Set angles and constrains for all torsions for i, angle in enumerate(delta_angles): set_angle = origin_angles[i] + angle # Set clockwork angle try: Chem.rdMolTransforms.SetDihedralDeg(conformer_prime, *torsions[i], set_angle) except: pass # Set forcefield constrain ffc.MMFFAddTorsionConstraint(*torsions[i], False, set_angle-delta, set_angle+delta, 1.0e10) # minimize constrains status = run_forcefield(ffc, 500) # Set result coordinates = conformer_prime.GetPositions() coordinates = np.round(coordinates, coord_decimals) # rdkit hack, read description smiles = "" try: energy, ocoordinates = quantum.optmize_conformation(atoms, coordinates) status = 0 coordinates = ocoordinates if reference_smiles is not None: new_smiles = quantum.get_smiles(atoms, coordinates) smiles = new_smiles if new_smiles != reference_smiles: status = 5 except: energy = 0.0 status = 4 # if status == 0: # atoms_str = [cheminfo.convert_atom(atom) for atom in atoms] # txt = rmsd.set_coordinates(atoms_str, coordinates, title="") # with open("_tmp_local_dump.xyz", 'a') as f: # f.write(txt) # f.write("\n") # # print(status, smiles) return energy, coordinates, status
def main_folder(): import argparse parser = argparse.ArgumentParser() parser.add_argument('-v', '--version', action='version', version="1.0") parser.add_argument('--sdf', nargs="+", action='store', help='', metavar='FILE') args = parser.parse_args() # TODO Merge results from redis if args.sdf is None: print("error: actually we need sdfs to merge") quit() dumpdir = "_tmp_apentane_cum/" filename = args.sdf[0] + "{:}_{:}" + ".sdf" molobjs, energies, coordinates, representations = generate_sdf( filename.format(1, 1)) atoms, xyz = cheminfo.molobj_to_xyz(molobjs[0]) # costcombos, costs = clockwork.generate_costlist(total_torsions=28) costcombos, costs = clockwork.generate_costlist() n_total = len(molobjs) molcosts = [(1, 1)] * n_total print("start", n_total) for combo in costcombos[:15]: try: molobjs_new, energies_new, coordinates_new, representations_new = generate_sdf( filename.format(*combo)) except: continue print(" merge", len(molobjs_new)) idxs = merge_asymmetric(atoms, energies_new, energies, representations_new, representations) n_new = 0 for i, idxl in enumerate(idxs): N = len(idxl) if N > 0: continue energies.append(energies_new[i]) coordinates.append(coordinates_new[i]) representations.append(representations_new[i]) molobjs.append(molobjs_new[i]) n_new += 1 molcosts += [combo] * n_new n_total += n_new print(" - new", n_new) print("total", n_total, combo) sdfstr = [cheminfo.molobj_to_sdfstr(molobj) for molobj in molobjs] sdfstr = "".join(sdfstr) f = open(dumpdir + "all.sdf", 'w') f.write(sdfstr) f.close() hellodump = "" for combo in molcosts: hello = "{:} {:}".format(*combo) hellodump += hello + "\n" f = open(dumpdir + "costs.csv", 'w') f.write(hellodump) f.close() plt.plot(energies, 'k.') plt.yscale("log") plt.savefig(dumpdir + "energies") return
def set_structures(datadict, scratch, procs=0): """ take dict of smiles->value and generate sdf from smiles. Put in scratch/structures.sdf.gz Put values in scratch/properties.{txt,npy} """ keys = datadict.keys() results = [] # no mp if procs == 0: def get_results(): values = [] for key in keys: values.append(datadict[key]) for smi, value in zip(keys, values): result = prepare_sdf_and_csv(smi, value) yield result results = get_results() # scale it out elif procs > 0: def workpackages(): for i, key in enumerate(keys): smi = key kelvin = datadict[key] yield smi, kelvin lines = workpackages() import multiprocessing.util as util util.log_to_stderr(util.SUBDEBUG) p = Pool(procs) results = p.map(prepare_sdf_and_csv_procs, lines) print("wating for results") fsdf = gzip.open(scratch + "structures.sdf.gz", 'w') fprop = open(scratch + "properties.csv", 'w') for i, result in enumerate(results): if result is None: continue molobj, values = result mean = np.mean(values) prtstr = np.round(values, decimals=1) print("save {:4.2f}".format(mean), "-", prtstr) sdfstr = cheminfo.molobj_to_sdfstr(molobj) sdfstr += "$$$$\n" fsdf.write(sdfstr.encode()) valuesstr = " ".join([str(x) for x in values]) # propstr = "{:} {:}\n".format(mean, standard_deviation) propstr = f"{i} " + valuesstr + "\n" fprop.write(propstr) fsdf.close() fprop.close() return
def main(datafile, procs=0, scr="_tmp_"): db = misc.load_obj(datafile) keys = db.keys() print("total keys:", len(keys)) xaxis = [] yaxis = [] if procs == 0: def get_results(): for i, key in enumerate(keys): smi = key kelvin = db[key] result = prepare_sdf_and_csv(smi, kelvin) if result is None: continue yield result results = get_results() else: def workpackages(): for i, key in enumerate(keys): # if i > 5000: break smi = key kelvin = db[key] yield smi, kelvin lines = workpackages() results = misc.parallel(lines, prepare_sdf_and_csv_procs, [], {}, procs=procs) print("streaming results") # Write results fullsdf = "" fsdf = gzip.open("data/sdf/structures.sdf.gz", 'w') fprop = open("data/sdf/properties.csv", 'w') for i, result in enumerate(results): if result is None: continue molobj, values = result sdfstr = cheminfo.molobj_to_sdfstr(molobj) fsdf.write(sdfstr.encode()) valuesstr = " ".join(values) # propstr = "{:} {:}\n".format(mean, standard_deviation) propstr = f"{i} " + valuestr fprop.write(propstr) fsdf.close() fprop.close() return