def write_mols_to_sdf(unique_list, sdf_filename, min_rmsd, out_filename): """Creates new sdf file and writes unique molecules into it""" if out_filename: out_mol = pybel.Outputfile('sdf', out_filename) else: out_mol = pybel.Outputfile('sdf', sdf_filename) for pymol in unique_list: out_mol.write(pymol) out_filename = out_mol.filename out_mol.close() return out_filename
def generate_report(results, outfile='dw_scores.sdf', vendors_list=None, zinc=True): if vendors_list: sd = pybel.Outputfile('sdf', "ltd_" + outfile, overwrite=True) else: sd = pybel.Outputfile('sdf', outfile, overwrite=True) if zinc: for j, (smiles, xid, label, vendors, zincid) in enumerate(results): mol = pybel.readstring('smi', str(smiles))['ZINCID'] = zincid['Smiles'] = smiles['Rank'] = j + 1['x*'] = xid['Label'] = label['log P'] = mol.calcdesc(descnames=['logP'])['logP'] if vendors_list: v = [re.compile(vend) for vend in vendors_list] approved = [] for patt in v: for co in list( set([ b for c in [x.keys() for x in vendors] for b in c ])): if, co): approved.append(co)['vendors'] = '\n'.join(list(set(approved))) if len(['vendors']) > 0: sd.write(mol) else:['vendors'] = '\n'.join( list( set([ b for c in [x.keys() for x in vendors] for b in c ]))) sd.write(mol) sd.close() else: for j, (smiles, xid, label) in enumerate(results): mol = pybel.readstring('smi', str(smiles))['x*'] = xid['Label'] = label sd.write(mol) sd.close()
def rotateGroupSingleMol(self, xyzName, modList): myMol = pybel.readfile('xyz', xyzName).next() statusTotal = True coords = [iatom.coords for iatom in myMol.atoms] [angle, groupA, groupB] = modList if len(groupA) < 1: print "Nothing to rotate along direction " return statusTotal #Get the center of set B vcenter = np.array([0.0, 0.0, 0.0]) for i in groupB: vcenter += np.array(coords[i]) vcenter /= len(groupB) #Get the axis of the plan defined by set B v1 = np.array(coords[groupB[0]]) - np.array(coords[groupB[1]]) v2 = np.array(coords[groupB[2]]) - np.array(coords[groupB[1]]) axis = np.cross(v1, v2) vnorm = np.linalg.norm(axis) axis /= vnorm #Now do the rotation for i in groupA: oldcoord = np.array(coords[i]) newcoord = self.rotateAroundAxis(oldcoord, vcenter, axis, angle) newcoordV3 = pybel.ob.vector3(newcoord[0], newcoord[1], newcoord[2]) atomToMove = myMol.OBMol.GetAtom(i + 1) atomToMove.SetVector(newcoordV3) # delete the original xyz file['rm', '-r', xyzName]) # if succeeded, overwrite and get new xyz if statusTotal == True: output = pybel.Outputfile("xyz", xyzName) output.write(myMol) output.close() return statusTotal
def moveGroupSingleMol(self, xyzName, modList): myMol = pybel.readfile('xyz', xyzName).next() statusTotal = True coords = [iatom.coords for iatom in myMol.atoms] for mod in modList: [dist, d1, d2, groupAtoms] = mod if len(groupAtoms) < 1: print "Nothing to move along direction ", d1, "-->", d2 [dist, d1, d2, groupAtoms] = mod v1 = np.array(coords[d1]) v2 = np.array(coords[d2]) v = v2 - v1 vnorm = np.linalg.norm(v) v /= vnorm for i in groupAtoms: newcoord = np.array(coords[i]) + dist * v newcoordV3 = pybel.ob.vector3(newcoord[0], newcoord[1], newcoord[2]) atomToMove = myMol.OBMol.GetAtom(i + 1) atomToMove.SetVector(newcoordV3) # delete the original xyz file['rm', '-r', xyzName]) # if succeeded, overwrite and get new xyz if statusTotal == True: output = pybel.Outputfile("xyz", xyzName) output.write(myMol) output.close() return statusTotal
def modifyBondsSingleMol(self, xyzName, bondList): myMol = pybel.readfile('xyz', xyzName).next() statusTotal = True for bond in bondList: [bondLength, i0, i1] = bond bondToChange = myMol.OBMol.GetBond(i0 + 1, i1 + 1) if bondToChange == None: print "WARNING! The specified bond ", i0, "-", i1, " does not exist" print "Creating bond ", i0, "-", i1, " and then process the bondlength modification request" bondOrder = 1 addBondStatus = myMol.OBMol.AddBond(i0 + 1, i1 + 1, bondOrder) if addBondStatus == False: print "Failed to add specified bond ", i0, "-", i1 statusTotal = False break else: bondToChange = myMol.OBMol.GetBond(i0 + 1, i1 + 1) atomToFix = myMol.OBMol.GetAtom(i0 + 1) bondToChange.SetLength(atomToFix, bondLength) if self.localopt_ == True: myMol.localopt(steps=500) # delete the original xyz file['rm', '-r', xyzName]) # if succeeded, overwrite and get new xyz if statusTotal == True: output = pybel.Outputfile("xyz", xyzName) output.write(myMol) output.close() return statusTotal
def addHydrogenSingleMol(self, xyzName, HPositionList): myMol = pybel.readfile('xyz', xyzName).next() statusTotal = True for HPosition in HPositionList: #Calculate new hydrogen position and add hydrogen Htype = HPosition[0] if Htype == "sp2": status = self.addHydrogenSp2(myMol, HPosition) #Print out Hydrogen addition result if status == False: if self.indexed_ == 0: print 'Failed to add sp2 H atom to molecule: ', xyzName, ', at atom ', i0, '(', i1, i2, '), bondlength:', round( bondlength, 2) else: print 'Failed to add sp2 H atom to molecule: ', xyzName, ', at atom ', i0 + 1, '(', i1 + 1, i2 + 1, '), bondlength:', round( bondlength, 2) statusTotal = False break else: #TODO: implement this part print "sp3 H adding not implemented yet" statusTotal = False break # delete the original xyz file['rm', '-r', xyzName]) # if succeeded, overwrite and get new xyz if statusTotal == True: output = pybel.Outputfile("xyz", xyzName, overwrite=True) output.write(myMol) output.close() return statusTotal
def main(): parser = argparse.ArgumentParser( description="Change the title from a molecule file to metadata \ value of a given-id of the same molecule file.", ) parser.add_argument('--infile', '-i', required=True, help="path to the input file") parser.add_argument('--outfile', '-o', required=True, help="path to the output file") parser.add_argument('--key', '-k', required=True, help="the metadata key from the sdf file which should inlcude the new title") parser.add_argument('--random', '-r', action="store_true", help="Add random suffix to the title.") args = parser.parse_args() output = pybel.Outputfile("sdf", args.outfile, overwrite=True) for mol in pybel.readfile("sdf", args.infile): if args.key in mol.title =[args.key] if args.random: suffix = ''.join(random.choice(string.ascii_lowercase + string.digits) for _ in range(13)) mol.title += '__%s' % suffix output.write( mol ) output.close()
def align(self): print self.step_, 'Align all effective conformers to the lowest-energy conformer' checkAndRemoveFile(self.mdXyzFileName_) print 'Wring out effective conformers to file:', self.mdXyzFileName_ mdXyzFile = open(self.mdXyzFileName_, 'a') for i in range(0, self.numEffectMin_): name = self.nameEnergyList_[i][0]['cat', name], stdout=mdXyzFile) mdXyzFile.close() #Get Topology file to be used in mdtraj topXyzName = self.nameEnergyList_[0][0] topPdbName = topXyzName.replace('xyz', 'pdb') checkAndRemoveFile(topPdbName) mol = pybel.readfile('xyz', topXyzName).next() output = pybel.Outputfile('pdb', topPdbName, overwrite=True) output.write(mol) output.close() #Load conformers into mdtraj and analyze t = md.load(self.mdXyzFileName_, top=topPdbName) self.t_aligned_ = t.superpose(t, frame=0, atom_indices=self.alignedAtoms_) checkAndRemoveFile('') self.t_aligned_.save_xyz('') printLine() self.step_ += 1
def format_conversion(self): ''' A Tool: convert the small molecules from the origin format to the object format. meanwhile, it will add hydrogen and give atomic particle charge under charge model: (eem, eem2015ba, eem2015bm, eem2015bn, eem2015ha, eem2015hm, eem2015hn, eqeq, fromfile, gasteiger, mmff94, none, qeq, qtpie). ''' import os, sys try: import pybel except Exception as exc: sys.stdout.write('\n\033[1;31mThere is a problem:\033[0m\t%s\n' % exc) sys.exit( '\n\033[1;36mPlease use pip/conda to install it or append its path to PYTHONPATH.\033[0m\n' ) with open(self.temp_illustration, 'a') as tmp_f0: tmp_f0.write( '# ligand with charge from pybel(charge model: %s).\n\t%s\n' % (self.charge_model, 'lig_pybel.mol2')) with open('lig_pybel.mol2', 'w') as lig_object: for lig_file in list(',')): in_format = lig_file.split('.')[-1] in_put = lig_file.strip() out_put = pybel.Outputfile('mol2', 'lig_pybel_tmp.mol2') mol = list(pybel.readfile(in_format, in_put))[0] mol.addh() if self.draw_key: mol.draw(show=False, filename='%s.png' % lig_file.split('.')[0]) mol.calccharges(model=self.charge_model) out_put.write(mol) lig_object.writelines([_ for _ in open('lig_pybel_tmp.mol2')]) os.unlink('lig_pybel_tmp.mol2')
def main(input_ext, inputfile, output_ext, outputfilename, nconfs, rmsd_cutoff, energy_cutoff): ff = pybel._forcefields['mmff94'] outputfile = pybel.Outputfile(output_ext, outputfilename, overwrite=True) for i, mol in enumerate(pybel.readfile(input_ext, inputfile)): t = time.time() print "**Molecule %d\n..title = %s" % (i, mol.title) print "..number of rotatable bonds = %d" % mol.OBMol.NumRotors() mol.addh() ff.Setup(mol.OBMol) ff.DiverseConfGen(rmsd_cutoff, nconfs, energy_cutoff) ff.GetConformers(mol.OBMol) confdata = pybel.ob.toConformerData( mol.OBMol.GetData(pybel.ob.ConformerData)) energies = confdata.GetEnergies() N = mol.OBMol.NumConformers() assert N == len(energies) print "..generated %d conformers" u = time.time() data = [] for i in range(N): mol.OBMol.SetConformer(i) outputfile.write(mol) print "..(overall time = %.1fs writing results = %.1fs)" % ( time.time() - t, time.time() - u) print "\n" outputfile.close()
def filter_by_name(args): outfile = pybel.Outputfile(args.oformat, args.output, overwrite=True) for mol in pybel.readfile('sdf', args.input): for name in open(args.list_of_names): if mol.title.strip() == name.strip(): outfile.write(mol) outfile.close()
def main(): #Initializes paths using system arguments readPath = str( sys.argv[1]) #the user input the path to read sdf files from storePath = str(sys.argv[2]) #the user inputs path to store the poses into os.chdir( readPath) #Navigate to the folder containing the sdf files to read nameList = [] #Keep track of all file names inside of the read folder for filename in os.listdir(os.getcwd()): if not filename.startswith('.'): nameList.append(filename) i = 0 #Variable used to increment the filenames inside of the output folder #Splits the sdf file by each molecule and saves under an incrementing file name for j in range(len(nameList)): os.chdir(readPath) for mol in pybel.readfile( 'sdf', nameList[j]): #reads each sdf file in readPath os.chdir(storePath) while os.path.exists( 'pose%s.sdf' % i): #increments i to find next highest file name i += 1 out = pybel.Outputfile('sdf', 'pose%s.sdf' % i) #creates new .sdf file out.write(mol) out.close()
def main(): parser = argparse.ArgumentParser( description="Change the title from a molecule file to metadata \ value of a given-id of the same molecule file.", ) parser.add_argument('--infile', '-i', required=True, help="path to the input file") parser.add_argument('--outfile', '-o', required=True, help="path to the output file") parser.add_argument( '--key', '-k', required=True, help= "the metadata key from the sdf file which should inlcude the new title" ) args = parser.parse_args() output = pybel.Outputfile("sdf", args.outfile, overwrite=True) for mol in pybel.readfile("sdf", args.infile): if args.key in mol.title =[args.key] output.write(mol) output.close()
def saveMol(prot, outPref, outFmt): # apply eventual patches due to amber top to openbabel format differences and output docking format file currDir = os.getcwd() # adjust H for atom in prot: if atom.atomicnum == 1: atom.OBAtom.SetType('H') for res in ob.OBResidueIter(prot.OBMol): resname = res.GetName() if resname == 'HEM': #nhem=res.GetNum() for atm in ob.OBResidueAtomIter(res): if atm.GetType() == 'FE': atm.SetType('Fe') if atm.GetType() == 'Du': atName = res.GetAtomID(atm).lstrip() atm.SetType(atName[0]) print "save mol2 for docking" templfn = os.path.join(currDir, "%s.%s" % (outPref, outFmt)) out = pb.Outputfile(outFmt, templfn, overwrite=True) out.write(prot) out.close() return templfn
def main(workdir): """ Main method for generating many kinbot runs to get a large dataset of initial ts structures """ dir = os.path.expanduser(workdir) #read the .dat file f = open('{}smi.dat'.format(dir)) par = imp.load_source('par', '', f) #make a sdf file for visualization output = pybel.Outputfile("sdf", dir + "species.sdf",overwrite=True) for name in par.smiles: smi = par.smiles[name] obmol = pybel.readstring("smi",smi) output.write(obmol) output.close() #list with the jobs that need to be done jobs = [] #iterate the input files for name in par.smiles: #name = input_file.replace('.inp','') #name of the calculation test_dir = dir + name #location where the calculations will be done if not os.path.exists(test_dir): os.mkdir(test_dir) #copy the input file to the working directory write_input_file(par,name,par.smiles[name],test_dir + '/input.inp') job = workdir + name + '/' jobs.append(job) run_threads(jobs, 'eric', max_running = 3)
def main(): commandLineParser = argparse.ArgumentParser(description="Docking Results Ranking to Three Dimensions: takes \ a table generated by and builds a file \ with the coordinates of the docked molecules") commandLineParser.add_argument("rankingFile", help="file generated by") commandLineParser.add_argument("outputFile", help="output filename") commandLineParser.add_argument("-r", "--lowest-ranking", dest="lowestRanking", default=1, type=int, help="Last ranking level to be processed. Default: 1") commandLineParser.add_argument("-f", "--output-format", dest="outputFormat", default="sdf", help="Format of the file to be written. It must be a openbabel supported output format. Deafult: sdf") options = commandLineParser.parse_args() (ranking,basenames) = processRanking(options.rankingFile, options.lowestRanking) for (basename,coordFile) in basenames: try: mols = pybel.readfile("sdf", coordFile) except: print("Can't open " + coordFile, file=sys.stderr) exit() for mol in mols: if (mol.title,basename) in ranking: ranking[(mol.title,basename)] = pybel.Molecule(pybel.ob.OBMol(mol.OBMol)) out = pybel.Outputfile(filename=options.outputFile, format=options.outputFormat, overwrite=True) for (molname,basename) in list(ranking.keys()): mol = ranking[(molname,basename)] mol.title += "_" + basename out.write(mol) out.close()
def __init__(self, inputFile, reference=None, cutoff=10.0, residueList=None): """Initialization of the protein, defined by a list of residues""" self.residueList = residueList self.residues = {} self.inputFile = inputFile fileExtension = os.path.splitext(inputFile)[1] if fileExtension.lower() == '.mol2': self.residuesFromMOL2File() elif fileExtension.lower() == '.pdb': for mol in pybel.readfile("pdb", inputFile): # print(dir(mol)) outfile = inputFile.replace('.pdb', '_conv.mol2') output = pybel.Outputfile("mol2", outfile, overwrite=True) output.write(mol) output.close() self.inputFile = outfile else: raise ValueError( '{} files are not supported for the protein.'.format( fileExtension[1:].upper())) self.residuesFromMOL2File() # if not self.residueList: # self.residueList = self.detectCloseResidues(reference, cutoff) self.cleanResidues()
def makefiles(filename, folder, length, repunit=1): monos = getmonomers(filename) smiles = [] for i, smile in enumerate(monos): if repunit == 1: smiles.append(smile * length) elif repunit == 2: smiles.extend([ "%s%s" % (smile, x) * (length / repunit) for x in monos[i + 1:] ]) info = open(os.path.join(folder, folder + ".txt"), "w") sdf = pybel.Outputfile("sdf", os.path.join(folder, folder + ".sdf"), overwrite=True) for i, smile in enumerate(smiles): print i, str(smile) print >> info, str(smile) mol = pybel.readstring("smi", smile) globalopt(mol) gaussian = (header + "\n\n" + smile + "\n" + "\n".join( mol.write("gau").replace("0 3\n", "0 1\n").split("\n")[3:]) + header_b) % (i, i) with open(os.path.join(folder, "%d.gjf" % i), "w") as output: output.write(gaussian) mol.title = str(i) sdf.write(mol) info.close() sdf.close()
def compute_properties(args): if args.oformat == 'sdf': outfile = pybel.Outputfile(args.oformat, args.output, overwrite=True) else: outfile = open(args.output, 'w') if args.header: mol = next(pybel.readfile(args.iformat, args.input)) metadata = cheminfolib.get_properties_ext(mol) outfile.write( '%s\n' % '\t'.join([cheminfolib.ColumnNames[key] for key in metadata])) for mol in pybel.readfile(args.iformat, args.input): if mol.OBMol.NumHvyAtoms() > 5: metadata = cheminfolib.get_properties_ext(mol) if args.oformat == 'sdf': [ {cheminfolib.ColumnNames[key]: metadata[key]}) for key in metadata ] outfile.write(mol) else: outfile.write( '%s\n' % ('\t'.join([str(metadata[key]) for key in metadata]))) outfile.close()
def aggregate(self): """remove hydrogen and add <MOLID> """ untarred_dir = self.requires().output().path zincs = glob(os.path.join(untarred_dir, "ZINC*")) mols = [] for zinc in zincs: try: mols.append(list(pybel.readfile('sdf', zinc))) except Exception: print("WARNING: ", "Fail to load zinc ligand %s" % zinc, file=sys.stderr) mols = [mol for sub in mols for mol in sub] ofn = os.path.join(self.subset_work_dir, self.ligand_code + '_1.sdf') self.aggregated_ofn = ofn ofs = pybel.Outputfile('sdf', ofn, overwrite=True) try: for mol in mols: mol.removeh()['MOLID'] = mol.title ofs.write(mol) except Exception as detail: print("WARNING:", detail, file=sys.stderr) finally: ofs.close()
def main(): #replace with path you will read the sdf files from readPath = '/Users/brycekroencke/Documents/Fellowship/data/largeSdfFiles' #replace with path you want to store .sdf poses in storePath = '/Users/brycekroencke/Documents/Fellowship/data/poses' os.chdir( readPath) #Navigate to the folder containing the sdf files to read nameList = [] #Keep track of all file names inside of the read folder for filename in os.listdir(os.getcwd()): nameList.append(filename) i = 0 #Variable used to increment the filenames inside of the output folder #Splits the sdf file by each molecule and saves under an incrementing file name for j in range(len(nameList)): os.chdir(readPath) for mol in pybel.readfile( 'sdf', nameList[j]): #reads each sdf file in readPath os.chdir(storePath) while os.path.exists( 'pose%s.sdf' % i): #increments i to find next highest file name i += 1 out = pybel.Outputfile('sdf', 'pose%s.sdf' % i) #creates new .sdf file out.write(mol) out.close()
def convertData(startExt, targetExt, path): outputfile = f"{os.path.splitext(path)[0]}.{targetExt}" if os.path.isfile(outputfile): return w = pybel.readfile(startExt, path) mol = next(w) out = pybel.Outputfile(targetExt, outputfile) out.write(mol) out.close()
def addh(args): outfile = pybel.Outputfile(args.iformat, args.output, overwrite=True) for mol in pybel.readfile(args.iformat, args.input): if mol.OBMol.NumHvyAtoms() > 5: mol.removeh() mol.OBMol.AddHydrogens(args.polar, True, args.pH) outfile.write(mol) outfile.close()
def remove_ions(args): outfile = pybel.Outputfile(args.iformat, args.output, overwrite=True) for mol in pybel.readfile(args.iformat, args.input): if mol.OBMol.NumHvyAtoms() > 5: mol.OBMol.StripSalts(0) # Check if new small fragments have been created and remove them if mol.OBMol.NumHvyAtoms() > 5: outfile.write(mol) outfile.close()
def get_pybel_mol(self): ''' If the object has a rdkit molecule, use this to get a pybel molecule ''' temp_sdf ='_temp.sdf' output = pybel.Outputfile('sdf', temp_sdf) output.write(self._rdkit_mol) self._pybel_mol = readfile('sdf', temp_sdf).next() os.remove(temp_sdf)
def __init__(self, smiles, file_format='pdb', filename='main'): self.smiles = smiles self.format = file_format self.filename = filename + '.' + file_format mol = pybel.readstring('smi', self.smiles) mol.make3D() mol.localopt(forcefield='mmff94', steps=1500) output = pybel.Outputfile(self.format, self.filename) output.write(mol) output.close()
def convertData(namespace, startExt, targetExt, proteinnumber, path): paths, complexes = Preprocessing.getAllMolPaths( path, namespace + '.' + startExt) w = pybel.readfile(startExt, paths[proteinnumber]) molec = next(w) out = pybel.Outputfile( targetExt, path + complexes[proteinnumber] + '/' + complexes[proteinnumber] + namespace + '.' + targetExt) out.write(molec) out.close()
def main(): if len(sys.argv) < 2: print "No input file provided: filetosprocess.ext" print "The script will determine which file type to read from by the extension." print "It is recommended you run your structures through,\nfor example, ChemAxon's Standardizer first." sys.exit(1) molnum = 0 Fragments = dict() for mol in pybel.readfile(sys.argv[1].split('.')[1], sys.argv[1]): molnum += 1 if not (molnum % 10): print "Molecules processed:", molnum #if molnum == 210: # break #print mol mol.OBMol.DeleteHydrogens() smiles = mol.write("smi").split("\t")[0] #print smiles #out.write(mol) #print "Number of rings:", len(mol.sssr) canmol = pybel.readstring("smi", smiles) FusedRingsMatrix = GetFusedRingsMatrix(canmol) FusedRings = GetFusedRings(FusedRingsMatrix, len(canmol.sssr)) #print FusedRings RingSystems = GetAtomsInRingSystems(canmol, FusedRings, inclexo=True) # Delete all non-ring atoms: this is now done in GetCanonicalFragments() #for ringnum in range(len(mol.sssr)): # mol = pybel.readstring("smi", smiles) # ratoms = list(mol.sssr[ringnum]._path) # #print "Atoms in ring:", sorted(ratoms, reverse=True) # #Delete complementary atoms # remove = list(set(range(1,len(mol.atoms)+1)).difference(set(ratoms))) # for a in sorted(remove, reverse=True): # mol.OBMol.DeleteAtom(mol.atoms[a-1].OBAtom) # #print mol # #out.write(mol) # Get all rings/ring systems frags = GetCanonicalFragments(smiles, RingSystems) for frag in frags: if frag in Fragments: Fragments[frag] += 1 else: Fragments[frag] = 1 # Write results to file print "Writing results to file." out = pybel.Outputfile("sdf", "fragments.sdf", overwrite=True) d = Fragments for k, v in sorted(d.items(), key=itemgetter(1), reverse=True): mol = pybel.readstring("smi", k)["COUNT"] = v mol.OBMol.DeleteHydrogens() out.write(mol) out.close()
def separate(self, molecule_type, outfile): output = pybel.Outputfile(molecule_type, outfile, overwrite=True) split_mol = self.mol.OBMol.Separate() for mol in split_mol: mol = pybel.Molecule(mol){ "parent name": self.parent_name, "parent canonical smiles": self.parent_can, }) output.write(mol) output.close()
def d3_viewer(self, molecule, viewer='avogadro'): """ Opens the geometry of the molecule in a 3d viewer. Currently only avogadro is supported, but you can test it with other programms as well. """ mol = self.one_mol_from_sdf(molecule) output = pybel.Outputfile('sdf', ".tmp.sdf", overwrite=True) output.write(mol) call([viewer, '.tmp.sdf']) os.remove(".tmp.sdf")