def CalculateLabuteASA(mol): """ ################################################################# Calculation of Labute's Approximate Surface Area (ASA from MOE) Usage: result=CalculateLabuteASA(mol) Input: mol is a molecule object Output: result is a dict form ################################################################# """ res={} temp=MOE.pyLabuteASA(mol,includeHs=1) res['LabuteASA']=round(temp,3) return res
def testTPSAShort(self): " Short TPSA test " inName = RDConfig.RDDataDir + '/NCI/first_200.tpsa.csv' inF = open(inName, 'r') lines = inF.readlines() for line in lines: if line[0] != '#': line.strip() smi, ans = line.split(',') ans = float(ans) mol = Chem.MolFromSmiles(smi) calc = MolSurf.TPSA(mol) assert feq( calc, ans), 'bad TPSA for SMILES %s (%.2f != %.2f)' % (smi, calc, ans)
def PhyChem(smiles): """ Calculating the 19D physicochemical descriptors for each molecules, the value has been normalized with Gaussian distribution. Arguments: smiles (list): list of SMILES strings. Returns: props (ndarray): m X 19 matrix as normalized PhysChem descriptors. m is the No. of samples """ props = [] for smile in smiles: mol = Chem.MolFromSmiles(smile) try: MW = desc.MolWt(mol) LOGP = Crippen.MolLogP(mol) HBA = Lipinski.NumHAcceptors(mol) HBD = Lipinski.NumHDonors(mol) rotable = Lipinski.NumRotatableBonds(mol) amide = AllChem.CalcNumAmideBonds(mol) bridge = AllChem.CalcNumBridgeheadAtoms(mol) heteroA = Lipinski.NumHeteroatoms(mol) heavy = Lipinski.HeavyAtomCount(mol) spiro = AllChem.CalcNumSpiroAtoms(mol) FCSP3 = AllChem.CalcFractionCSP3(mol) ring = Lipinski.RingCount(mol) Aliphatic = AllChem.CalcNumAliphaticRings(mol) aromatic = AllChem.CalcNumAromaticRings(mol) saturated = AllChem.CalcNumSaturatedRings(mol) heteroR = AllChem.CalcNumHeterocycles(mol) TPSA = MolSurf.TPSA(mol) valence = desc.NumValenceElectrons(mol) mr = Crippen.MolMR(mol) # charge = AllChem.ComputeGasteigerCharges(mol) prop = [MW, LOGP, HBA, HBD, rotable, amide, bridge, heteroA, heavy, spiro, FCSP3, ring, Aliphatic, aromatic, saturated, heteroR, TPSA, valence, mr] except Exception: print(smile) prop = [0] * 19 props.append(prop) props = np.array(props) props = Scaler().fit_transform(props) return props
def _testTPSALongNCI(self): " Long TPSA test " fileN = 'tpsa_regr.csv' with open(os.path.join(RDConfig.RDCodeDir,'Chem','test_data',fileN),'r') as inF: lines = inF.readlines() lineNo = 0 for line in lines: lineNo+=1 if line[0] != '#': line.strip() smi,ans = line.split(',') ans = float(ans) mol = Chem.MolFromSmiles(smi) assert mol,"line %d, failed for smiles: %s"%(lineNo,smi) calc = MolSurf.TPSA(mol) assert feq(calc,ans),'line %d: bad TPSA for SMILES %s (%.2f != %.2f)'%(lineNo,smi,calc,ans)
def CalculateTPSA(mol): """ ################################################################# Calculation of topological polar surface area based on fragments. Implementation based on the Daylight contrib program tpsa. Usage: result=CalculateTPSA(mol) Input: mol is a molecule object Output: result is a dict form ################################################################# """ res = {} temp = MOE.TPSA(mol) res['TPSA'] = round(temp, 3) return res
def describe_atom(atom_object, use_formal_charge=False, use_Gasteiger=False): mol = atom_object.GetOwningMol() contribs = MolSurf._LabuteHelper(mol) idx = atom_object.GetIdx() code = { 'SP': 1, 'SP2': 2, 'SP3': 3, 'UNSPECIFIED': -1, 'UNKNOWN': -1, 'S': 0, 'SP3D': 4, 'SP3D2': 5 } result = [] symbol = atom_object.GetSymbol() result.append(atom_object.GetAtomicNum()) try: one_hot = [0.0 for _ in range(7)] hib = code[atom_object.GetHybridization().name] one_hot[hib + 1] = 1.0 #result+=one_hot result.append(hib) result.append(atom_object.GetTotalValence()) except: print(Chem.MolToSmiles(mol, canonical=0), idx) raise result.append( max(atom_object.GetNumImplicitHs(), atom_object.GetNumExplicitHs())) result.append(p_table.GetNOuterElecs(symbol)) result.append(electronegativity.get(symbol, 0)) result.append(float(atom_object.GetIsAromatic())) if use_formal_charge: result.append(atom_object.GetFormalCharge()) if use_Gasteiger: q_in_neu = atom_object.GetDoubleProp( '_GasteigerHCharge') + atom_object.GetDoubleProp( '_GasteigerCharge') result.append(q_in_neu) result.append(contribs[idx + 1]) return result
def filter_druglikeness_5_rules(self, smiles): count = 0 for i in smiles: mol = Chem.MolFromSmiles(i) mol = Chem.RemoveHs(mol) MW = rdmd._CalcMolWt(mol) ALOGP = Crippen.MolLogP(mol) HBA = rdmd.CalcNumHBA(mol) HBD = rdmd.CalcNumHBD(mol) PSA = MolSurf.TPSA(mol) ROTB = rdmd.CalcNumRotatableBonds( mol, rdmd.NumRotatableBondsOptions.Strict) if MW > 600 or ALOGP > 6 or ALOGP < 0 or HBA > 11 or HBD > 7 or PSA > 180 or ROTB > 11: smiles.remove(i) count = count + 1 print("unavaliable rule_5_drug:%i" % count) return smiles
def testBug12a(self): from rdkit.Chem import MolSurf inD = [ ('OC(=O)[CH](CC1=CC=CC=C1)C2=CC=CC=C2', 37.3), ('OC(=O)C(C1=CC=CC=C1)(C2=CC=CC=C2)C3=CC=CC=C3', 37.3), ('CCC(CC)(CC)[CH](OC(=O)C1=C(C=CC=C1)C(O)=O)C2=CC=CC=C2', 63.6), ('C[C](O)([CH](C(O)=O)C1=CC=CC=C1)C2=CC=CC=C2', 57.53), ('C[CH]([CH](C(O)=O)C1=CC=CC=C1)C2=CC=CC=C2', 37.3), ('OC(=O)CBr', 37.3), ('OC(=O)CCl', 37.3), ('OC(=O)C=CC(=O)C1=CC=CC=C1', 54.37), ('NC1=C(C=CC=C1)C(O)=O', 63.32), ('OC(=O)C1=CC=CC=C1', 37.3), ('CN(C)C(=N)NC1=NC(=C2C=C(Cl)C=CC2=N1)C.O[N+]([O-])=O', 128.27), ('CCN(CC)C(=N)NC1=NC(=C2C=C(Cl)C=CC2=N1)C.O[N+]([O-])=O', 128.27), ('ON(O)NC(=N)NN=C1C(=O)NC2=C1C=CC=C2', 133.07), ('NC1=CC=C(C=C1)C=NNC(=N)NN(O)O', 129.99), ('CC(=O)NC1=CC=C(C=C1)C=NNC(=N)NN(O)O', 133.07), ('COC1=CC=C(C=C1)C=NNC(=N)NN(O)O', 113.2), ('ON(O)NC(=N)NN=CC1=CC=CC=C1', 103.97), ('ON(O)NC(=N)NN=CC=CC1=CC=CC=C1', 103.97), ('ON(O)NC(=N)NN=CC1=C(Cl)C=C(Cl)C=C1', 103.97), ('CC(C)=CCCC(C)=CC=NNC(=N)NN(O)O', 103.97), ('CN(C)C1=CC=C(C=C1)C=NNC(=N)NN(O)O', 107.21), ('ON(O)NC(=N)NN=CC1=CC=CO1', 117.11), ('ON(O)NC(=N)NN=CC1=CC=C(O)C=C1', 124.2), ('CC(C)C1=CC=C(C=C1)C=NNC(=N)NN(O)O', 103.97), ('COC1=C(C=CC=C1)C=NNC(=N)NN(O)O', 113.2), ('ON(O)NC(=N)NN=CC1=C(C=CC=C1)[N+]([O-])=O', 147.11), ('ON(O)NC(=N)NN=CC1=CC=C(C=C1)[N+]([O-])=O', 147.11), ('ON(O)NC(=N)NN=CC1=C(O)C=CC(=C1)[N+]([O-])=O', 167.34), ('ON(O)NC(=N)NN=CC1=CC=NC=C1', 116.86), ('ON(O)NC(=N)NN=CC1=CC=CC=N1', 116.86), ('ON(O)NC(=N)NN=CC1=CC=CN=C1', 116.86), ] for smi, val in inD: mol = Chem.MolFromSmiles(smi) v = MolSurf.TPSA(mol) assert feq( v, val), 'bad TPSA (%f != %f) for smiles: %s' % (v, val, smi)
def properties(mol): """ Calculates the properties that are required to calculate the QED descriptor. """ matches = [] if (mol is None): raise TypeError('You need to provide a mol argument.') x = [0] * 8 x[0] = rdmd._CalcMolWt(mol) # MW x[1] = Crippen.MolLogP(mol) # ALOGP for hbaPattern in Acceptors: # HBA if (mol.HasSubstructMatch(hbaPattern)): matches = mol.GetSubstructMatches(hbaPattern) x[2] += len(matches) x[3] = Lipinski.NumHDonors(mol) # HBD x[4] = MolSurf.TPSA(mol) # PSA x[5] = Lipinski.NumRotatableBonds(mol) # ROTB x[6] = Chem.GetSSSR(Chem.DeleteSubstructs(deepcopy(mol), AliphaticRings)) # AROM for alert in StructuralAlerts: # ALERTS if (mol.HasSubstructMatch(alert)): x[7] += 1 return x
def makeFeatures(fileName): from rdkit import Chem from rdkit.Chem import AllChem from rdkit.Chem import MolSurf global featuresFile, numFeatures featuresFile = open(fileName, 'w') # Molecule features output file drugDB = Chem.SDMolSupplier("FKBP12_binders.sdf") if debug: print "\n\tNo features data file found. Writing new features data file.\n" text = "" # Placeholder for feature data molCount = 0 # Select features of interest for mol in drugDB: #text += "{}\n".format(molCount) text += "{}\n".format(AllChem.ComputeMolVolume(mol)) text += "{}\n".format(MolSurf.pyLabuteASA(mol)) text += "{}\n".format(mol.GetNumAtoms()) text += "{}\n".format(mol.GetNumBonds()) text += "{}\n".format(mol.GetNumHeavyAtoms()) text += "\nKI: {}\n".format(mol.GetProp("Ki (nM)")) text += "\n" # Use a blank line to divide molecule data featuresFile.write(text) text = "" molCount += 1 featuresFile.close()
def test_pySMR_VSA_(self): for data in TestCase.readNCI_200(): molPy = Chem.MolFromSmiles(data.smiles) for calcC, calcPy in zip(MolSurf.SMR_VSA_(data.mol), MolSurf.pySMR_VSA_(molPy, force=False)): self.assertAlmostEqual(calcC, calcPy)
def test_pyLabuteASA(self): for data in TestCase.readNCI_200(): molPy = Chem.MolFromSmiles(data.smiles) self.assertAlmostEqual(MolSurf.LabuteASA(data.mol), MolSurf.pyLabuteASA(molPy))
def test_pyLabuteHelper(self): for data in TestCase.readNCI_200(): molPy = Chem.MolFromSmiles(data.smiles) for calcC, calcPy in zip(MolSurf._LabuteHelper(data.mol), MolSurf._pyLabuteHelper(molPy)): self.assertAlmostEqual(calcC, calcPy)
def CalculateTPSA(mol): return round(MS.TPSA(mol), 3)
def _testLabuteASALong2(self): """ test calculation of Labute's ASA value """ col = 6 self.__testDesc('PP_descrs_regress.2.csv',col,lambda x:MolSurf.LabuteASA(x,includeHs=1))
def calculate(self): return MolSurf.LabuteASA(self.mol)
def makeFeatures(fileName): from rdkit import Chem from rdkit.Chem import Fragments from rdkit.Chem import AllChem from rdkit.Chem import MolSurf global featuresFile, numFeatures featuresFile = open(fileName, 'w') # Molecule features output file # run gaussian jobs # gaussian.setNumMols() # gaussian.makeAllGinps() # gaussian.runGaussianOnAllGinps() # open database file drugDB = Chem.SDMolSupplier("FKBP12_binders.sdf") if debug: print "\n\tNo features data file found. Writing new features data file.\n" text = "" # Placeholder for feature data molCount = 0 convergedCount = 0 converged_and_different = 0 drug_name = [] # load fragment descriptor Fragments._LoadPatterns(fileName='/usr/local/anaconda/pkgs/rdkit-2015.03.1-np19py27_0/share/RDKit/Data/FragmentDescriptors.csv') # Select features of interest for mol in drugDB: if molCount > -1: # print mol.GetProp("BindingDB Target Chain Sequence") gaussian_log_file = "gaussian_files/drug_"+str(molCount)+".log" converged, dipole, quadrupole, octapole, hexadecapole, dg_solv = gaussian.parseGaussianLog(gaussian_log_file) if converged == "True" and mol.GetProp("BindingDB Target Chain Sequence") == "MGVQVETISPGDGRTFPKRGQTCVVHYTGMLEDGKKFDSSRDRNKPFKFMLGKQEVIRGWEEGVAQMSVGQRAKLTISPDYAYGATGHPGIIPPHATLVFDVELLKLE": if convergedCount ==0: diff = "True" else: diff = "True" for i in range(converged_and_different): if mol.GetProp("BindingDB Ligand Name") == drug_name[i]: diff = "False" break if diff == "True": drug_name.append(mol.GetProp("BindingDB Ligand Name")) text += "{}\n".format(AllChem.ComputeMolVolume(mol)) text += "{}\n".format(MolSurf.pyLabuteASA(mol)) text += "{}\n".format(mol.GetNumAtoms()) text += "{}\n".format(mol.GetNumBonds()) text += "{}\n".format(mol.GetNumHeavyAtoms()) text += "{}\n".format(dipole) text += "{}\n".format(quadrupole) text += "{}\n".format(octapole) text += "{}\n".format(hexadecapole) text += "{}\n".format(dg_solv) text += "{}\n".format(Fragments.fr_Al_OH(mol)) # aliphatic alcohols text += "{}\n".format(Fragments.fr_Ar_OH(mol)) # aromatic alcohols text += "{}\n".format(Fragments.fr_ketone(mol)) # number of ketones text += "{}\n".format(Fragments.fr_ether(mol)) # number of ether oxygens text += "{}\n".format(Fragments.fr_ester(mol)) # number of esters text += "{}\n".format(Fragments.fr_aldehyde(mol)) # number of aldehydes text += "{}\n".format(Fragments.fr_COO(mol)) # number of carboxylic acids text += "{}\n".format(Fragments.fr_benzene(mol)) # number of benzenes text += "{}\n".format(Fragments.fr_Ar_N(mol)) # number of aromatic nitrogens text += "{}\n".format(Fragments.fr_NH0(mol)) # number of tertiary amines text += "{}\n".format(Fragments.fr_NH1(mol)) # number of secondary amines text += "{}\n".format(Fragments.fr_NH2(mol)) # number of primary amines text += "{}\n".format(Fragments.fr_amide(mol)) # number of amides text += "{}\n".format(Fragments.fr_SH(mol)) # number of thiol groups text += "{}\n".format(Fragments.fr_nitro(mol)) # number of nitro groups text += "{}\n".format(Fragments.fr_furan(mol)) # number of furan rings text += "{}\n".format(Fragments.fr_imidazole(mol)) # number of imidazole rings text += "{}\n".format(Fragments.fr_oxazole(mol)) # number of oxazole rings text += "{}\n".format(Fragments.fr_morpholine(mol)) # number of morpholine rings text += "{}\n".format(Fragments.fr_halogen(mol)) # number of halogens text += "\nKI: {}\n".format(mol.GetProp("Ki (nM)")) text += "\n" # Use a blank line to divide molecule data featuresFile.write(text) text = "" converged_and_different += 1 convergedCount += 1 else: break molCount += 1 print "Number of molecules with converged gaussian log files and correct sequence:", convergedCount, "\n" print "Number of overlap drugs:", convergedCount - converged_and_different featuresFile.close()
def CalculateTPSA(mol: Chem.Mol) -> float: """Calculate the topological polar surface area. From Ertl P. et al., J.Med.Chem. (2000), 43,3714-3717. """ return round(MS.TPSA(mol), 3)
def CalculatePEOEVSA(mol, bins=None): temp = MOE.PEOE_VSA_(mol, bins, force=1) res = {} for i, j in enumerate(temp): res['PEOEVSA' + str(i)] = round(j, 3) return res
def calc_rdkit(mol): descriptors = pd.Series( np.array([ Crippen.MolLogP(mol), Crippen.MolMR(mol), Descriptors.FpDensityMorgan1(mol), Descriptors.FpDensityMorgan2(mol), Descriptors.FpDensityMorgan3(mol), Descriptors.FractionCSP3(mol), Descriptors.HeavyAtomMolWt(mol), Descriptors.MaxAbsPartialCharge(mol), Descriptors.MaxPartialCharge(mol), Descriptors.MinAbsPartialCharge(mol), Descriptors.MinPartialCharge(mol), Descriptors.MolWt(mol), Descriptors.NumRadicalElectrons(mol), Descriptors.NumValenceElectrons(mol), EState.EState.MaxAbsEStateIndex(mol), EState.EState.MaxEStateIndex(mol), EState.EState.MinAbsEStateIndex(mol), EState.EState.MinEStateIndex(mol), EState.EState_VSA.EState_VSA1(mol), EState.EState_VSA.EState_VSA10(mol), EState.EState_VSA.EState_VSA11(mol), EState.EState_VSA.EState_VSA2(mol), EState.EState_VSA.EState_VSA3(mol), EState.EState_VSA.EState_VSA4(mol), EState.EState_VSA.EState_VSA5(mol), EState.EState_VSA.EState_VSA6(mol), EState.EState_VSA.EState_VSA7(mol), EState.EState_VSA.EState_VSA8(mol), EState.EState_VSA.EState_VSA9(mol), Fragments.fr_Al_COO(mol), Fragments.fr_Al_OH(mol), Fragments.fr_Al_OH_noTert(mol), Fragments.fr_aldehyde(mol), Fragments.fr_alkyl_carbamate(mol), Fragments.fr_alkyl_halide(mol), Fragments.fr_allylic_oxid(mol), Fragments.fr_amide(mol), Fragments.fr_amidine(mol), Fragments.fr_aniline(mol), Fragments.fr_Ar_COO(mol), Fragments.fr_Ar_N(mol), Fragments.fr_Ar_NH(mol), Fragments.fr_Ar_OH(mol), Fragments.fr_ArN(mol), Fragments.fr_aryl_methyl(mol), Fragments.fr_azide(mol), Fragments.fr_azo(mol), Fragments.fr_barbitur(mol), Fragments.fr_benzene(mol), Fragments.fr_benzodiazepine(mol), Fragments.fr_bicyclic(mol), Fragments.fr_C_O(mol), Fragments.fr_C_O_noCOO(mol), Fragments.fr_C_S(mol), Fragments.fr_COO(mol), Fragments.fr_COO2(mol), Fragments.fr_diazo(mol), Fragments.fr_dihydropyridine(mol), Fragments.fr_epoxide(mol), Fragments.fr_ester(mol), Fragments.fr_ether(mol), Fragments.fr_furan(mol), Fragments.fr_guanido(mol), Fragments.fr_halogen(mol), Fragments.fr_hdrzine(mol), Fragments.fr_hdrzone(mol), Fragments.fr_HOCCN(mol), Fragments.fr_imidazole(mol), Fragments.fr_imide(mol), Fragments.fr_Imine(mol), Fragments.fr_isocyan(mol), Fragments.fr_isothiocyan(mol), Fragments.fr_ketone(mol), Fragments.fr_ketone_Topliss(mol), Fragments.fr_lactam(mol), Fragments.fr_lactone(mol), Fragments.fr_methoxy(mol), Fragments.fr_morpholine(mol), Fragments.fr_N_O(mol), Fragments.fr_Ndealkylation1(mol), Fragments.fr_Ndealkylation2(mol), Fragments.fr_NH0(mol), Fragments.fr_NH1(mol), Fragments.fr_NH2(mol), Fragments.fr_Nhpyrrole(mol), Fragments.fr_nitrile(mol), Fragments.fr_nitro(mol), Fragments.fr_nitro_arom(mol), Fragments.fr_nitro_arom_nonortho(mol), Fragments.fr_nitroso(mol), Fragments.fr_oxazole(mol), Fragments.fr_oxime(mol), Fragments.fr_para_hydroxylation(mol), Fragments.fr_phenol(mol), Fragments.fr_phenol_noOrthoHbond(mol), Fragments.fr_phos_acid(mol), Fragments.fr_phos_ester(mol), Fragments.fr_piperdine(mol), Fragments.fr_piperzine(mol), Fragments.fr_priamide(mol), Fragments.fr_prisulfonamd(mol), Fragments.fr_pyridine(mol), Fragments.fr_quatN(mol), Fragments.fr_SH(mol), Fragments.fr_sulfide(mol), Fragments.fr_sulfonamd(mol), Fragments.fr_sulfone(mol), Fragments.fr_term_acetylene(mol), Fragments.fr_tetrazole(mol), Fragments.fr_thiazole(mol), Fragments.fr_thiocyan(mol), Fragments.fr_thiophene(mol), Fragments.fr_unbrch_alkane(mol), Fragments.fr_urea(mol), GraphDescriptors.BalabanJ(mol), GraphDescriptors.BertzCT(mol), GraphDescriptors.Chi0(mol), GraphDescriptors.Chi0n(mol), GraphDescriptors.Chi0v(mol), GraphDescriptors.Chi1(mol), GraphDescriptors.Chi1n(mol), GraphDescriptors.Chi1v(mol), GraphDescriptors.Chi2n(mol), GraphDescriptors.Chi2v(mol), GraphDescriptors.Chi3n(mol), GraphDescriptors.Chi3v(mol), GraphDescriptors.Chi4n(mol), GraphDescriptors.Chi4v(mol), GraphDescriptors.HallKierAlpha(mol), GraphDescriptors.Ipc(mol), GraphDescriptors.Kappa1(mol), GraphDescriptors.Kappa2(mol), GraphDescriptors.Kappa3(mol), Lipinski.HeavyAtomCount(mol), Lipinski.NHOHCount(mol), Lipinski.NOCount(mol), Lipinski.NumAliphaticCarbocycles(mol), Lipinski.NumAliphaticHeterocycles(mol), Lipinski.NumAliphaticRings(mol), Lipinski.NumAromaticCarbocycles(mol), Lipinski.NumAromaticHeterocycles(mol), Lipinski.NumAromaticRings(mol), Lipinski.NumHAcceptors(mol), Lipinski.NumHDonors(mol), Lipinski.NumHeteroatoms(mol), Lipinski.NumRotatableBonds(mol), Lipinski.NumSaturatedCarbocycles(mol), Lipinski.NumSaturatedHeterocycles(mol), Lipinski.NumSaturatedRings(mol), Lipinski.RingCount(mol), MolSurf.LabuteASA(mol), MolSurf.PEOE_VSA1(mol), MolSurf.PEOE_VSA10(mol), MolSurf.PEOE_VSA11(mol), MolSurf.PEOE_VSA12(mol), MolSurf.PEOE_VSA13(mol), MolSurf.PEOE_VSA14(mol), MolSurf.PEOE_VSA2(mol), MolSurf.PEOE_VSA3(mol), MolSurf.PEOE_VSA4(mol), MolSurf.PEOE_VSA5(mol), MolSurf.PEOE_VSA6(mol), MolSurf.PEOE_VSA7(mol), MolSurf.PEOE_VSA8(mol), MolSurf.PEOE_VSA9(mol), MolSurf.SlogP_VSA1(mol), MolSurf.SlogP_VSA10(mol), MolSurf.SlogP_VSA11(mol), MolSurf.SlogP_VSA12(mol), MolSurf.SlogP_VSA2(mol), MolSurf.SlogP_VSA3(mol), MolSurf.SlogP_VSA4(mol), MolSurf.SlogP_VSA5(mol), MolSurf.SlogP_VSA6(mol), MolSurf.SlogP_VSA7(mol), MolSurf.SlogP_VSA8(mol), MolSurf.SlogP_VSA9(mol), MolSurf.SMR_VSA1(mol), MolSurf.SMR_VSA10(mol), MolSurf.SMR_VSA2(mol), MolSurf.SMR_VSA3(mol), MolSurf.SMR_VSA4(mol), MolSurf.SMR_VSA5(mol), MolSurf.SMR_VSA6(mol), MolSurf.SMR_VSA7(mol), MolSurf.SMR_VSA8(mol), MolSurf.SMR_VSA9(mol), MolSurf.TPSA(mol) ])) return descriptors
def main(): infile = open("molecule_training.csv", 'r') infile.readline() with open('train_molecule_new_features.csv', 'w') as f: writer = csv.writer(f) # writer.writerow(['index', 'Maximum Degree', 'Minimum Degree', 'Molecular Weight', 'Number of H-Bond Donors', # 'Number of Rings', 'Number of Rotatable Bonds', 'Polar Surface Area', 'Graph', 'smiles', # 'target']) writer.writerow([ 'index', 'Maximum Degree', 'Minimum Degree', 'Molecular Weight', 'Number of H-Bond Donors', 'Number of Rings', 'Number of Rotatable Bonds', 'Polar Surface Area', 'fr_phos', 'aromatic_carbocycles', 'MolLogP', 'PEOE_VSA1', 'Fingerprint', 'smiles', 'target' ]) for line in infile: line = line.strip('\n\r ') line = line.split(",") smiles = line[10].strip() #edge_list = to_graph(smiles) mol = Chem.MolFromSmiles(smiles) # fingerprint_explicit_bitvector = RDKFingerprint(mol) # fingerprint_bit_string = fingerprint_explicit_bitvector.ToBitString() fingerprint_bit_string = GetMorganFingerprintAsBitVect( mol, 2).ToBitString() #writer.writerow(line[:8] + [fingerprint_bit_string, line[10], line[11]]) #writer.writerow(line[:8] + [edge_list] + [line[10], line[11]]) fr_phos = Fragments.fr_phos_acid(mol) + Fragments.fr_phos_ester( mol) aromatic_cc = Lipinski.NumAromaticCarbocycles(mol) molLogP = Crippen.MolLogP(mol) peoe_vsa1 = MolSurf.PEOE_VSA1(mol) writer.writerow(line[:8] + [ fr_phos, aromatic_cc, molLogP, peoe_vsa1, fingerprint_bit_string, line[10], line[11] ]) infile.close() infile = open("molecule_TestFeatures.csv", 'r') infile.readline() with open('test_molecule_new_features.csv', 'w') as f: writer = csv.writer(f) # writer.writerow(['index', 'Maximum Degree', 'Minimum Degree', 'Molecular Weight', 'Number of H-Bond Donors', # 'Number of Rings', 'Number of Rotatable Bonds', 'Polar Surface Area', 'Graph', 'smiles', # 'target']) writer.writerow([ 'index', 'Maximum Degree', 'Minimum Degree', 'Molecular Weight', 'Number of H-Bond Donors', 'Number of Rings', 'Number of Rotatable Bonds', 'Polar Surface Area', 'fr_phos', 'aromatic_carbocycles', 'MolLogP', 'PEOE_VSA1', 'Fingerprint', 'smiles' ]) for line in infile: line = line.strip('\n\r ') line = line.split(",") smiles = line[10].strip() # edge_list = to_graph(smiles) mol = Chem.MolFromSmiles(smiles) # fingerprint_explicit_bitvector = RDKFingerprint(mol) # fingerprint_bit_string = fingerprint_explicit_bitvector.ToBitString() fingerprint_bit_string = GetMorganFingerprintAsBitVect( mol, 2).ToBitString() fr_phos = Fragments.fr_phos_acid(mol) + Fragments.fr_phos_ester( mol) aromatic_cc = Lipinski.NumAromaticCarbocycles(mol) molLogP = Crippen.MolLogP(mol) peoe_vsa1 = MolSurf.PEOE_VSA1(mol) writer.writerow(line[:8] + [ fr_phos, aromatic_cc, molLogP, peoe_vsa1, fingerprint_bit_string, line[10] ]) # writer.writerow(line[:8] + [edge_list] + [line[10], line[11]]) infile.close()
def CalculateTPSA(mol): res = {} temp = MOE.TPSA(mol) res['TPSA'] = round(temp, 3) return res
def CalculateSLOGPVSA(mol, bins=None): temp = MOE.SlogP_VSA_(mol, bins, force=1) res = {} for i, j in enumerate(temp): res['slogPVSA' + str(i)] = round(j, 3) return res
def CalculateLabuteASA(mol: Chem.Mol) -> dict: """Calculate Labute's Approximate Surface Area (ASA from MOE).""" res = {} temp = MOE.pyLabuteASA(mol, includeHs=1) res['LabuteASA'] = round(temp, 3) return res
def CalculateLabuteASA(mol): res = {} temp = MOE.pyLabuteASA(mol, includeHs=1) res['LabuteASA'] = round(temp, 3) return res