def add_mols(data): namec = "" altnamec = "" suppc = "" suppidc = "" storc = "" storidc = "" amountc = "" unitc = "" casc = "" altsupplierc = "" altsupplierIDc = "" commc = "" classc = "" counter = 0 head = data.next() for c in head: #Get the indexes of columns cl = c.lower() if cl == "name": namec = counter elif cl == "altname": altnamec = counter elif cl == "supplier": suppc = counter elif cl == "supplierid": suppidc = counter elif cl == "storage": storc = counter elif cl == "storageid": storidc = counter elif cl == "amount": amountc = counter elif cl == "unit": unitc = counter elif cl == "cas": casc = counter elif cl == "smiles": smilesc = counter elif cl == "supplier2": altsupplierc = counter elif cl == "supplierid2": altsupplierIDc = counter elif cl == "comment": commc = counter elif cl == "molclass": classc = counter counter += 1 if type(smilesc) == int or type(namec) == int: # has to have either smiles or name column print "Following columns were found and will be imported:" if type(namec) == int: print head[namec] + " ", if type(altnamec) == int: print head[altnamec] + " ", if type(suppc) == int: print head[suppc] + " ", if type(suppidc) == int: print head[suppidc] + " ", if type(storc) == int: print head[storc] + " ", if type(storidc) == int: print head[storidc] + " ", if type(amountc) == int: print head[amountc] + " ", if type(unitc) == int: print head[unitc] + " ", if type(casc) == int: print head[casc] + " ", if type(smilesc) == int: print head[smilesc] + " ", if type(altsupplierc) == int: print head[altsupplierc] + " ", if type(altsupplierIDc) == int: print head[altsupplierIDc] + " ", if type(commc) == int: print head[commc] + " ", if type(classc) == int: print head[classc] + " ", print "" userinput = raw_input("Is this ok? yes/no: ") #Check with user if everything looks OK if userinput == "yes": for line in data: #do some datachecks and encode in ACSII since some databases have problems with UTF-8 if type(namec) == int: name = line[namec].decode("windows-1252").encode('utf-8','ignore') else: name = "" if type(altnamec) == int: altname = line[altnamec].decode("windows-1252").encode('utf-8','ignore') else: altname = "" if type(suppc) == int: supp = line[suppc].decode("windows-1252").encode('utf-8','ignore') else: supp = "" if type(suppidc) == int: suppid = line[suppidc].decode("windows-1252").encode('utf-8','ignore') else: suppid = "" if type(storc) == int: storage = line[storc].decode("windows-1252").encode('utf-8','ignore') else: storage = "" if type(storidc) == int: storageid = line[storidc].decode("windows-1252").encode('utf-8','ignore') else: storageid = "" if type(amountc) == int: amount = line[amountc] else: amount = "" if type(unitc) == int: unit = line[unitc].decode("windows-1252").encode('utf-8','ignore') else: unit = "" if type(casc) == int: cas = line[casc].decode("windows-1252").encode('utf-8','ignore') else: cas = "" if type(smilesc) == int: smiles = line[smilesc].decode("windows-1252").encode('utf-8','ignore') else: smiles = "" if type(altsupplierc) == int: altsupplier = line[altsupplierc].decode("windows-1252").encode('utf-8','ignore') else: altsupplier = "" if type(altsupplierIDc) == int: altsupplierID = line[altsupplierIDc].decode("windows-1252").encode('utf-8','ignore') else: altsupplierID = "" if type(commc) == int: comm = line[commc].decode("windows-1252").encode('utf-8','ignore') else: comm = "" if type(classc) == int: molclass = line[classc].decode("windows-1252").encode('utf-8','ignore') else: molclass = "" try: amount = float(line[amountc]) unit = line[unitc] except: amount = 0 unit = "X" try: mol = pybel.readstring("smi",smiles) descs = mol.calcdesc() #generate 2D coordinates, needs openbabel obConversion = openbabel.OBConversion() obConversion.SetInAndOutFormats("smi", "mdl") obmol = openbabel.OBMol() obConversion.ReadString(obmol, smiles) gen2d = openbabel.OBOp.FindType("gen2d") gen2d.Do(obmol) MDL = obConversion.WriteString(obmol) outMDL = MDL.replace("\n", r"\n") CMW=descs["MW"] CHN=mol.formula HBA=descs["HBA1"] HBD=descs["HBD"] logP=descs["logP"] tpsa=descs["TPSA"] #Get number of rotatable bonds smarts = pybel.Smarts("[!$([NH]!@C(=O))&!D1&!$(*#*)]\&!@[!$([NH]!@C(=O))&!D1&!$(*#*)]") rb = smarts.findall(mol) nrb = len(rb) #Calculate Fsp3 sp3c = pybel.Smarts("[CX4]") nsp3c = sp3c.findall(mol) nsp3c = float(len(nsp3c)) allc = pybel.Smarts("[#6]") nallc = allc.findall(mol) nallc = float(len(nallc)) if nallc > 0: fsp3 = nsp3c/nallc print fsp3 else: fsp3 = "" #Get fingerprint and molecular complexity fprint = mol.calcfp() bitson = fprint.bits nbitson = len(bitson) print name m = Molecule(name=name,SMILES=smiles, altname=altname, supplier=supp, supplierID=suppid, CMW=descs["MW"], CHN=CHN, HBA=HBA, HBD=HBD, logP=logP, tpsa=tpsa, amount=amount, unit=unit, CAS=cas, storage=storage, storageID=storageid, molfile=outMDL, nrb=nrb, fingerprint=bitson, complexity=nbitson, altsupplier=altsupplier, altsupplierID=altsupplierID , comment=comm, molclass=molclass, fsp3=fsp3) m.save() except: m = Molecule(name=name,SMILES=smiles, altname=altname, supplier=supp, supplierID=suppid, amount=amount, unit=unit, CAS=cas, storage=storage, storageID=storageid, altsupplier=altsupplier, altsupplierID=altsupplierID , comment=comm, molclass=molclass) m.save() #Save data to database else: print "Exiting, no changes were made..." return False else: print "No valid columns were found in the table"
def addsingle(name, altname, supplier, supplierID, storage, storageID, unit, amount, cas, smiles, comment, molclass, platebarcode, samplebarcode, randomstring): #do some datachecks and encode in ACSII since some databases have problems with UTF-8 name = name.decode("windows-1252").encode('utf-8','ignore') altname = altname.decode("windows-1252").encode('utf-8','ignore') supplier = supplier.decode("windows-1252").encode('utf-8','ignore') supplierID = supplierID.decode("windows-1252").encode('utf-8','ignore') storage = storage.decode("windows-1252").encode('utf-8','ignore') storageID = storageID.decode("windows-1252").encode('utf-8','ignore') unit = unit.decode("windows-1252").encode('utf-8','ignore') cas = cas.decode("windows-1252").encode('utf-8','ignore') smiles = smiles.decode("windows-1252").encode('utf-8','ignore') comment = comment.decode("windows-1252").encode('utf-8','ignore') molclass = molclass.decode("windows-1252").encode('utf-8','ignore') platebarcode = platebarcode.decode("windows-1252").encode('utf-8','ignore') samplebarcode = samplebarcode.decode("windows-1252").encode('utf-8','ignore') # Make sure amount is a number try: amount = float(line[amountc]) unit = line[unitc] except: amount = 0 unit = "X" try: mol = pybel.readstring("smi",smiles) descs = mol.calcdesc() #generate 2D coordinates, needs openbabel obConversion = openbabel.OBConversion() obConversion.SetInAndOutFormats("smi", "mdl") obmol = openbabel.OBMol() obConversion.ReadString(obmol, smiles) gen2d = openbabel.OBOp.FindType("gen2d") gen2d.Do(obmol) MDL = obConversion.WriteString(obmol) outMDL = MDL.replace("\n", r"\n") CMW=descs["MW"] CHN=mol.formula HBA=descs["HBA1"] HBD=descs["HBD"] logP=descs["logP"] tpsa=descs["TPSA"] #Get number of rotatable bonds smarts = pybel.Smarts(r"[!$([NH]!@C(=O))&!D1&!$(*#*)]\&!@[!$([NH]!@C(=O))&!D1&!$(*#*)]") rb = smarts.findall(mol) nrb = len(rb) #Calculate Fsp3 sp3c = pybel.Smarts("[CX4]") nsp3c = sp3c.findall(mol) nsp3c = float(len(nsp3c)) allc = pybel.Smarts("[#6]") nallc = allc.findall(mol) nallc = float(len(nallc)) if nallc > 0: fsp3 = nsp3c/nallc else: fsp3 = "" #Get fingerprint and molecular complexity fprint = mol.calcfp() bitson = fprint.bits nbitson = len(bitson) if 'hts' in molclass.lower() or 'compound' in molclass.lower(): #print "hts" pains = detect_pains(mol) else: pains = 'Not checked' m = Molecule(name=name,SMILES=smiles, altname=altname, supplier=supplier, supplierID=supplierID, CMW=descs["MW"], CHN=CHN, HBA=HBA, HBD=HBD, logP=logP, tpsa=tpsa, amount=amount, unit=unit, CAS=cas, storage=storage, storageID=storageID, molfile=outMDL, nrb=nrb, fingerprint=bitson, complexity=nbitson, comment=comment, molclass=molclass, fsp3=fsp3, pains=pains, platebarcode=platebarcode, samplebarcode=samplebarcode, randomstring=randomstring) m.save() except: # OpenBabel failed, no properties, etc.. m = Molecule(name=name,SMILES=smiles, altname=altname, supplier=supplier, supplierID=supplierID, amount=amount, unit=unit, CAS=cas, storage=storage, storageID=storageID, comment=comment, molclass=molclass, platebarcode=platebarcode, samplebarcode=samplebarcode, randomstring=randomstring) m.save()
def addsingle(name, altname, supplier, supplierID, storage, storageID, unit, amount, cas, smiles, comment, molclass, platebarcode, samplebarcode, randomstring): #do some datachecks and encode in ACSII since some databases have problems with UTF-8 name = name.decode("windows-1252").encode('utf-8', 'ignore') altname = altname.decode("windows-1252").encode('utf-8', 'ignore') supplier = supplier.decode("windows-1252").encode('utf-8', 'ignore') supplierID = supplierID.decode("windows-1252").encode('utf-8', 'ignore') storage = storage.decode("windows-1252").encode('utf-8', 'ignore') storageID = storageID.decode("windows-1252").encode('utf-8', 'ignore') unit = unit.decode("windows-1252").encode('utf-8', 'ignore') cas = cas.decode("windows-1252").encode('utf-8', 'ignore') smiles = smiles.decode("windows-1252").encode('utf-8', 'ignore') comment = comment.decode("windows-1252").encode('utf-8', 'ignore') molclass = molclass.decode("windows-1252").encode('utf-8', 'ignore') platebarcode = platebarcode.decode("windows-1252").encode( 'utf-8', 'ignore') samplebarcode = samplebarcode.decode("windows-1252").encode( 'utf-8', 'ignore') # Make sure amount is a number try: amount = float(line[amountc]) unit = line[unitc] except: amount = 0 unit = "X" try: mol = pybel.readstring("smi", smiles) descs = mol.calcdesc() #generate 2D coordinates, needs openbabel obConversion = openbabel.OBConversion() obConversion.SetInAndOutFormats("smi", "mdl") obmol = openbabel.OBMol() obConversion.ReadString(obmol, smiles) gen2d = openbabel.OBOp.FindType("gen2d") gen2d.Do(obmol) MDL = obConversion.WriteString(obmol) outMDL = MDL.replace("\n", r"\n") CMW = descs["MW"] CHN = mol.formula HBA = descs["HBA1"] HBD = descs["HBD"] logP = descs["logP"] tpsa = descs["TPSA"] #Get number of rotatable bonds smarts = pybel.Smarts( r"[!$([NH]!@C(=O))&!D1&!$(*#*)]\&!@[!$([NH]!@C(=O))&!D1&!$(*#*)]") rb = smarts.findall(mol) nrb = len(rb) #Calculate Fsp3 sp3c = pybel.Smarts("[CX4]") nsp3c = sp3c.findall(mol) nsp3c = float(len(nsp3c)) allc = pybel.Smarts("[#6]") nallc = allc.findall(mol) nallc = float(len(nallc)) if nallc > 0: fsp3 = nsp3c / nallc else: fsp3 = "" #Get fingerprint and molecular complexity fprint = mol.calcfp() bitson = fprint.bits nbitson = len(bitson) if 'hts' in molclass.lower() or 'compound' in molclass.lower(): #print "hts" pains = detect_pains(mol) else: pains = 'Not checked' m = Molecule(name=name, SMILES=smiles, altname=altname, supplier=supplier, supplierID=supplierID, CMW=descs["MW"], CHN=CHN, HBA=HBA, HBD=HBD, logP=logP, tpsa=tpsa, amount=amount, unit=unit, CAS=cas, storage=storage, storageID=storageID, molfile=outMDL, nrb=nrb, fingerprint=bitson, complexity=nbitson, comment=comment, molclass=molclass, fsp3=fsp3, pains=pains, platebarcode=platebarcode, samplebarcode=samplebarcode, randomstring=randomstring) m.save() except: # OpenBabel failed, no properties, etc.. m = Molecule(name=name, SMILES=smiles, altname=altname, supplier=supplier, supplierID=supplierID, amount=amount, unit=unit, CAS=cas, storage=storage, storageID=storageID, comment=comment, molclass=molclass, platebarcode=platebarcode, samplebarcode=samplebarcode, randomstring=randomstring) m.save()
def add_mols(data): namec = "" altnamec = "" suppc = "" suppidc = "" storc = "" storidc = "" amountc = "" unitc = "" casc = "" altsupplierc = "" altsupplierIDc = "" commc = "" classc = "" counter = 0 head = data.next() for c in head: #Get the indexes of columns cl = c.lower() if cl == "name": namec = counter elif cl == "altname": altnamec = counter elif cl == "supplier": suppc = counter elif cl == "supplierid": suppidc = counter elif cl == "storage": storc = counter elif cl == "storageid": storidc = counter elif cl == "amount": amountc = counter elif cl == "unit": unitc = counter elif cl == "cas": casc = counter elif cl == "smiles": smilesc = counter elif cl == "supplier2": altsupplierc = counter elif cl == "supplierid2": altsupplierIDc = counter elif cl == "comment": commc = counter elif cl == "molclass": classc = counter counter += 1 if type(smilesc) == int or type(namec) == int: # has to have either smiles or name column print "Following columns were found and will be imported:" if type(namec) == int: print head[namec] + " ", if type(altnamec) == int: print head[altnamec] + " ", if type(suppc) == int: print head[suppc] + " ", if type(suppidc) == int: print head[suppidc] + " ", if type(storc) == int: print head[storc] + " ", if type(storidc) == int: print head[storidc] + " ", if type(amountc) == int: print head[amountc] + " ", if type(unitc) == int: print head[unitc] + " ", if type(casc) == int: print head[casc] + " ", if type(smilesc) == int: print head[smilesc] + " ", if type(altsupplierc) == int: print head[altsupplierc] + " ", if type(altsupplierIDc) == int: print head[altsupplierIDc] + " ", if type(commc) == int: print head[commc] + " ", if type(classc) == int: print head[classc] + " ", print "" userinput = raw_input("Is this ok? yes/no: ") #Check with user if everything looks OK if userinput == "yes": for line in data: #do some datachecks and encode in ACSII since some databases have problems with UTF-8 if type(namec) == int: name = line[namec].decode("windows-1252").encode( 'utf-8', 'ignore') else: name = "" if type(altnamec) == int: altname = line[altnamec].decode("windows-1252").encode( 'utf-8', 'ignore') else: altname = "" if type(suppc) == int: supp = line[suppc].decode("windows-1252").encode( 'utf-8', 'ignore') else: supp = "" if type(suppidc) == int: suppid = line[suppidc].decode("windows-1252").encode( 'utf-8', 'ignore') else: suppid = "" if type(storc) == int: storage = line[storc].decode("windows-1252").encode( 'utf-8', 'ignore') else: storage = "" if type(storidc) == int: storageid = line[storidc].decode("windows-1252").encode( 'utf-8', 'ignore') else: storageid = "" if type(amountc) == int: amount = line[amountc] else: amount = "" if type(unitc) == int: unit = line[unitc].decode("windows-1252").encode( 'utf-8', 'ignore') else: unit = "" if type(casc) == int: cas = line[casc].decode("windows-1252").encode( 'utf-8', 'ignore') else: cas = "" if type(smilesc) == int: smiles = line[smilesc].decode("windows-1252").encode( 'utf-8', 'ignore') else: smiles = "" if type(altsupplierc) == int: altsupplier = line[altsupplierc].decode( "windows-1252").encode('utf-8', 'ignore') else: altsupplier = "" if type(altsupplierIDc) == int: altsupplierID = line[altsupplierIDc].decode( "windows-1252").encode('utf-8', 'ignore') else: altsupplierID = "" if type(commc) == int: comm = line[commc].decode("windows-1252").encode( 'utf-8', 'ignore') else: comm = "" if type(classc) == int: molclass = line[classc].decode("windows-1252").encode( 'utf-8', 'ignore') else: molclass = "" try: amount = float(line[amountc]) unit = line[unitc] except: amount = 0 unit = "X" try: mol = pybel.readstring("smi", smiles) descs = mol.calcdesc() #generate 2D coordinates, needs openbabel obConversion = openbabel.OBConversion() obConversion.SetInAndOutFormats("smi", "mdl") obmol = openbabel.OBMol() obConversion.ReadString(obmol, smiles) gen2d = openbabel.OBOp.FindType("gen2d") gen2d.Do(obmol) MDL = obConversion.WriteString(obmol) outMDL = MDL.replace("\n", r"\n") CMW = descs["MW"] CHN = mol.formula HBA = descs["HBA1"] HBD = descs["HBD"] logP = descs["logP"] tpsa = descs["TPSA"] #Get number of rotatable bonds smarts = pybel.Smarts( "[!$([NH]!@C(=O))&!D1&!$(*#*)]\&!@[!$([NH]!@C(=O))&!D1&!$(*#*)]" ) rb = smarts.findall(mol) nrb = len(rb) #Calculate Fsp3 sp3c = pybel.Smarts("[CX4]") nsp3c = sp3c.findall(mol) nsp3c = float(len(nsp3c)) allc = pybel.Smarts("[#6]") nallc = allc.findall(mol) nallc = float(len(nallc)) if nallc > 0: fsp3 = nsp3c / nallc print fsp3 else: fsp3 = "" #Get fingerprint and molecular complexity fprint = mol.calcfp() bitson = fprint.bits nbitson = len(bitson) print name m = Molecule(name=name, SMILES=smiles, altname=altname, supplier=supp, supplierID=suppid, CMW=descs["MW"], CHN=CHN, HBA=HBA, HBD=HBD, logP=logP, tpsa=tpsa, amount=amount, unit=unit, CAS=cas, storage=storage, storageID=storageid, molfile=outMDL, nrb=nrb, fingerprint=bitson, complexity=nbitson, altsupplier=altsupplier, altsupplierID=altsupplierID, comment=comm, molclass=molclass, fsp3=fsp3) m.save() except: m = Molecule(name=name, SMILES=smiles, altname=altname, supplier=supp, supplierID=suppid, amount=amount, unit=unit, CAS=cas, storage=storage, storageID=storageid, altsupplier=altsupplier, altsupplierID=altsupplierID, comment=comm, molclass=molclass) m.save() #Save data to database else: print "Exiting, no changes were made..." return False else: print "No valid columns were found in the table"