def testMolWt(self): mol = Chem.MolFromSmiles("C") amw = rdMD._CalcMolWt(mol) self.assertTrue(feq(amw, 16.043, .001)) amw = rdMD._CalcMolWt(mol, True) self.assertTrue(feq(amw, 12.011, .001)) mol2 = Chem.AddHs(mol) amw = rdMD._CalcMolWt(mol2) self.assertTrue(feq(amw, 16.043, .001)) amw = rdMD._CalcMolWt(mol2, True) self.assertTrue(feq(amw, 12.011, .001)) mol = Chem.MolFromSmiles("C") amw = rdMD.CalcExactMolWt(mol) self.assertTrue(feq(amw, 16.031, .001))
def testMolWt(self): mol = Chem.MolFromSmiles("C"); amw = rdMD._CalcMolWt(mol); self.failUnless(feq(amw,16.043,.001)); amw = rdMD._CalcMolWt(mol,True); self.failUnless(feq(amw,12.011,.001)); mol2 = Chem.AddHs(mol); amw = rdMD._CalcMolWt(mol2); self.failUnless(feq(amw,16.043,.001)); amw = rdMD._CalcMolWt(mol2,True); self.failUnless(feq(amw,12.011,.001)); mol = Chem.MolFromSmiles("C"); amw = rdMD.CalcExactMolWt(mol); self.failUnless(feq(amw,16.031,.001));
def calc(smi, name): m = Chem.MolFromSmiles(smi) if m is not None: try: hba = rdMolDescriptors.CalcNumHBA(m) hbd = rdMolDescriptors.CalcNumHBD(m) nrings = rdMolDescriptors.CalcNumRings(m) rtb = rdMolDescriptors.CalcNumRotatableBonds(m) psa = rdMolDescriptors.CalcTPSA(m) logp, mr = rdMolDescriptors.CalcCrippenDescriptors(m) mw = rdMolDescriptors._CalcMolWt(m) csp3 = rdMolDescriptors.CalcFractionCSP3(m) hac = m.GetNumHeavyAtoms() if hac == 0: fmf = 0 else: fmf = GetScaffoldForMol(m).GetNumHeavyAtoms() / hac qed = QED.qed(m) nrings_fused = fused_ring_count(m) return name, hba, hbd, hba + hbd, nrings, rtb, round(psa, 2), round(logp, 2), round(mr, 2), round(mw, 2), \ round(csp3, 3), round(fmf, 3), round(qed, 3), hac, nrings_fused except: sys.stderr.write( f'molecule {name} was omitted due to an error in calculation of some descriptors\n' ) return None else: sys.stderr.write('smiles %s cannot be parsed (%s)' % (smi, name)) return None
def properties(mol): """ Calculates the properties that are required to calculate the QED descriptor. """ if mol is None: raise ValueError('You need to provide a mol argument.') mol = Chem.RemoveHs(mol) qedProperties = QEDproperties( MW=rdmd._CalcMolWt(mol), ALOGP=Crippen.MolLogP(mol), HBA=sum( len(mol.GetSubstructMatches(pattern)) for pattern in Acceptors if mol.HasSubstructMatch(pattern)), HBD=rdmd.CalcNumHBD(mol), PSA=MolSurf.TPSA(mol), ROTB=rdmd.CalcNumRotatableBonds(mol, rdmd.NumRotatableBondsOptions.Strict), AROM=Chem.GetSSSR(Chem.DeleteSubstructs(Chem.Mol(mol), AliphaticRings)), ALERTS=sum(1 for alert in StructuralAlerts if mol.HasSubstructMatch(alert)), ) # The replacement # AROM=Lipinski.NumAromaticRings(mol), # is not identical. The expression above tends to count more rings # N1C2=CC=CC=C2SC3=C1C=CC4=C3C=CC=C4 # OC1=C(O)C=C2C(=C1)OC3=CC(=O)C(=CC3=C2C4=CC=CC=C4)O # CC(C)C1=CC2=C(C)C=CC2=C(C)C=C1 uses 2, should be 0 ? return qedProperties
def properties(mol): """ Calculates the properties that are required to calculate the QED descriptor. """ if mol is None: raise ValueError('You need to provide a mol argument.') mol = Chem.RemoveHs(mol) qedProperties = QEDproperties( MW=rdmd._CalcMolWt(mol), ALOGP=Crippen.MolLogP(mol), HBA=sum(len(mol.GetSubstructMatches(pattern)) for pattern in Acceptors if mol.HasSubstructMatch(pattern)), HBD=rdmd.CalcNumHBD(mol), PSA=MolSurf.TPSA(mol), ROTB=rdmd.CalcNumRotatableBonds(mol, rdmd.NumRotatableBondsOptions.Strict), AROM=Chem.GetSSSR(Chem.DeleteSubstructs(Chem.Mol(mol), AliphaticRings)), ALERTS=sum(1 for alert in StructuralAlerts if mol.HasSubstructMatch(alert)), ) # The replacement # AROM=Lipinski.NumAromaticRings(mol), # is not identical. The expression above tends to count more rings # N1C2=CC=CC=C2SC3=C1C=CC4=C3C=CC=C4 # OC1=C(O)C=C2C(=C1)OC3=CC(=O)C(=CC3=C2C4=CC=CC=C4)O # CC(C)C1=CC2=C(C)C=CC2=C(C)C=C1 uses 2, should be 0 ? return qedProperties
def main(): sub_df = pd.read_csv("submissions_final_result.csv") cmp_ds = [] for _, row in sub_df.iterrows(): cmp_dict = {} mol = Chem.MolFromSmiles(row['smiles_string']) cmp_dict['submission_id'] = row['submission_id'] cmp_dict['smiles_string'] = row['smiles_string'] # Lipinski's rule cmp_dict['h_bond_donor'] = rd.CalcNumLipinskiHBD( mol) # Lipinski Hbond donor cmp_dict['h_bond_acceptor'] = rd.CalcNumLipinskiHBA( mol) # Lipinski Hbond acceptor cmp_dict['moluclar_mass'] = rd._CalcMolWt(mol) # Molecular Weight cmp_dict['log_p'] = rd.CalcCrippenDescriptors(mol)[ 0] # Partition coefficient # Topological polar surface area cmp_dict['topological_polar_surface_area'] = rd.CalcTPSA(mol) cmp_ds.append(cmp_dict) result = pd.merge(sub_df, pd.DataFrame(cmp_ds), on=['submission_id', 'smiles_string']) result.to_csv("lipinski_psa_result.csv", index=False, encoding='utf-8')
def calc(smi, name): m = Chem.MolFromSmiles(smi) if m is not None: try: hba = rdMolDescriptors.CalcNumHBA(m) hbd = rdMolDescriptors.CalcNumHBD(m) nrings = rdMolDescriptors.CalcNumRings(m) rtb = rdMolDescriptors.CalcNumRotatableBonds(m) psa = rdMolDescriptors.CalcTPSA(m) logp, mr = rdMolDescriptors.CalcCrippenDescriptors(m) mw = rdMolDescriptors._CalcMolWt(m) csp3 = rdMolDescriptors.CalcFractionCSP3(m) hac = m.GetNumHeavyAtoms() if hac == 0: fmf = 0 else: fmf = GetScaffoldForMol(m).GetNumHeavyAtoms() / hac qed = QED.qed(m) nrings_fused = fused_ring_count(m) n_unique_hba_hbd_atoms = count_hbd_hba_atoms(m) max_ring_size = len(max(m.GetRingInfo().AtomRings(), key=len, default=())) n_chiral_centers = len(FindMolChiralCenters(m, includeUnassigned=True)) fcsp3_bm = rdMolDescriptors.CalcFractionCSP3(GetScaffoldForMol(m)) return name, hba, hbd, hba + hbd, nrings, rtb, round(psa, 2), round(logp, 2), round(mr, 2), round(mw, 2), \ round(csp3, 3), round(fmf, 3), round(qed, 3), hac, nrings_fused, n_unique_hba_hbd_atoms, \ max_ring_size, n_chiral_centers, round(fcsp3_bm, 3) except: sys.stderr.write(f'molecule {name} was omitted due to an error in calculation of some descriptors\n') return None else: sys.stderr.write('smiles %s cannot be parsed (%s)' % (smi, name)) return None
def compute_descs_rdkit(mol): # We can always add more later on # noinspection PyProtectedMember MW = rdMolDescriptors._CalcMolWt(mol) HBA = rdMolDescriptors.CalcNumHBA(mol) HBD = rdMolDescriptors.CalcNumHBD(mol) TPSA = rdMolDescriptors.CalcTPSA(mol) aromatic_rings = rdMolDescriptors.CalcNumAromaticRings(mol) nb_heteroatoms = rdMolDescriptors.CalcNumHeteroatoms(mol) nb_rot_bonds = rdMolDescriptors.CalcNumRotatableBonds(mol) return MW, HBA, HBD, TPSA, aromatic_rings, nb_heteroatoms, nb_rot_bonds
def main(in_file, output): Cmpds = {} InMols = rdkit_open([in_file]) print('\n # Number of input molecule: {0}'.format(len(InMols))) for mol in InMols: m = {} name = mol.GetProp('_Name').split()[0] m['Name'] = name m['Formula'] = rd.CalcMolFormula(mol) m['SMILES'] = Chem.MolToSmiles(mol) m['MW'] = rd._CalcMolWt(mol) # Molecular Weight m['logP'] = rd.CalcCrippenDescriptors(mol)[0] # Partition coefficient m['HDon'] = rd.CalcNumLipinskiHBD(mol) # Lipinski Hbond donor m['HAcc'] = rd.CalcNumLipinskiHBA(mol) # Lipinski Hbond acceptor m['TPSA'] = rd.CalcTPSA(mol) # Topological polar surface area m['Rotat'] = rd.CalcNumRotatableBonds(mol, strict=True) # Rotatable bond m['MolRef'] = rd.CalcCrippenDescriptors(mol)[1] # Molar refractivity m['AliRing'] = rd.CalcNumAliphaticRings(mol) # Aliphatic ring number m['AroRing'] = rd.CalcNumAromaticRings(mol) # Aromatic ring number # m['Stereo'] = rd.CalcNumAtomStereoCenters(mol) # Stereo center number # m['UnspStereo'] = rd.CalcNumUnspecifiedAtomStereoCenters(mol) # unspecified stereo m['SMILES'] = Chem.MolToSmiles(mol, isomericSmiles=True, allHsExplicit=False) Cmpds[name] = m #################################### df = pd.DataFrame.from_dict(Cmpds, orient='index') df.index.name = 'Name' # Columns of data to print out Columns = [ 'Formula', 'MW', 'logP', 'HDon', 'HAcc', 'TPSA', 'Rotat', 'MolRef', 'AliRing', 'AroRing', #'Stereo', 'UnspStereo', 'SMILES', ] reorder = df[Columns] # Output to CSV reorder.to_csv( output+'.csv', sep=',', na_rep='NA', encoding='utf-8', float_format='%.5f', header=True ) # Output to Excel reorder.to_excel( output+'.xlsx', header=True, na_rep='NA' )
def calc(smi, name): m = Chem.MolFromSmiles(smi) if m is not None: hba = rdMolDescriptors.CalcNumHBA(m) hbd = rdMolDescriptors.CalcNumHBD(m) nrings = rdMolDescriptors.CalcNumRings(m) rtb = rdMolDescriptors.CalcNumRotatableBonds(m) psa = rdMolDescriptors.CalcTPSA(m) logp, mr = rdMolDescriptors.CalcCrippenDescriptors(m) mw = rdMolDescriptors._CalcMolWt(m) csp3 = rdMolDescriptors.CalcFractionCSP3(m) fmf = GetScaffoldForMol(m).GetNumAtoms(onlyHeavy=True) / m.GetNumAtoms(onlyHeavy=True) return name, hba, hbd, hba + hbd, nrings, rtb, round(psa, 2), round(logp, 2), round(mr, 2), round(mw, 2), \ round(csp3, 3), round(fmf, 3) else: sys.stderr.write('smiles %s cannot be parsed (%s)' % (smi, name)) return None
def filter_druglikeness_5_rules(self, smiles): count = 0 for i in smiles: mol = Chem.MolFromSmiles(i) mol = Chem.RemoveHs(mol) MW = rdmd._CalcMolWt(mol) ALOGP = Crippen.MolLogP(mol) HBA = rdmd.CalcNumHBA(mol) HBD = rdmd.CalcNumHBD(mol) PSA = MolSurf.TPSA(mol) ROTB = rdmd.CalcNumRotatableBonds( mol, rdmd.NumRotatableBondsOptions.Strict) if MW > 600 or ALOGP > 6 or ALOGP < 0 or HBA > 11 or HBD > 7 or PSA > 180 or ROTB > 11: smiles.remove(i) count = count + 1 print("unavaliable rule_5_drug:%i" % count) return smiles
def calculate_properties(self, smiles=None, mol=None, props=[]): """this method calculates basic properties for the mol returns : error (bool)""" if len(props) == 0: return True if mol is None: mol = Chem.MolFromSmiles(smiles) if mol is None: return True if 'py_formula' in props: self.data['py_formula'] = desc.CalcMolFormula(mol) if 'py_em' in props: self.data['py_em'] = round(desc.CalcExactMolWt(mol), 5) if 'py_n_Cl_Br' in props: all_atoms = [] for atom in mol.GetAtoms(): all_atoms.append(atom.GetSymbol()) n_Cl = all_atoms.count('Cl') n_Br = all_atoms.count('Br') self.data['py_n_Cl_Br'] = n_Cl + n_Br if 'py_na' in props: self.data['py_na'] = mol.GetNumAtoms() if 'py_mw' in props: self.data['py_mw'] = desc._CalcMolWt(mol) if 'py_fsp3' in props: self.data['py_fsp3'] = desc.CalcFractionCSP3(mol) if 'py_rb' in props: self.data['py_rb'] = desc.CalcNumRotatableBonds(mol) if 'py_tpsa' in props: self.data['py_tpsa'] = desc.CalcTPSA(mol) if 'py_clogp' in props: self.data['py_clogp'] = desc.CalcCrippenDescriptors(mol)[0] if 'py_nar' in props: self.data['py_nar'] = desc.CalcNumAromaticRings(mol) if 'py_nhba' in props: self.data['py_nhba'] = desc.CalcNumHBA(mol) if 'py_nhbd' in props: self.data['py_nhbd'] = desc.CalcNumHBD(mol) return False
def properties(mol): """ Calculates the properties that are required to calculate the QED descriptor. """ matches = [] if (mol is None): raise TypeError('You need to provide a mol argument.') x = [0] * 8 x[0] = rdmd._CalcMolWt(mol) # MW x[1] = Crippen.MolLogP(mol) # ALOGP for hbaPattern in Acceptors: # HBA if (mol.HasSubstructMatch(hbaPattern)): matches = mol.GetSubstructMatches(hbaPattern) x[2] += len(matches) x[3] = Lipinski.NumHDonors(mol) # HBD x[4] = MolSurf.TPSA(mol) # PSA x[5] = Lipinski.NumRotatableBonds(mol) # ROTB x[6] = Chem.GetSSSR(Chem.DeleteSubstructs(deepcopy(mol), AliphaticRings)) # AROM for alert in StructuralAlerts: # ALERTS if (mol.HasSubstructMatch(alert)): x[7] += 1 return x
for name in tmp: if name[0] != '_' and name[-1] != '_' and name not in others: # filter out python reference implementations: if name[:2] == 'py' and name[2:] in tmp: continue if name == 'print_function': continue thing = getattr(mod, name) if _isCallable(thing): namespace[name] = thing _descList.append((name, thing)) descList = _descList MolWt = lambda *x, **y: _rdMolDescriptors._CalcMolWt(*x, **y) MolWt.version = _rdMolDescriptors._CalcMolWt_version MolWt.__doc__ = """The average molecular weight of the molecule >>> MolWt(Chem.MolFromSmiles('CC')) 30.07 >>> MolWt(Chem.MolFromSmiles('[NH4+].[Cl-]')) 53.49... """ HeavyAtomMolWt = lambda x: MolWt(x, True) HeavyAtomMolWt.__doc__ = """The average molecular weight of the molecule ignoring hydrogens >>> HeavyAtomMolWt(Chem.MolFromSmiles('CC')) 24.02...
for name in tmp: if name[0] != "_" and name[-1] != "_" and name not in others: # filter out python reference implementations: if name[:2] == "py" and name[2:] in tmp: continue thing = getattr(mod, name) if _isCallable(thing): namespace[name] = thing _descList.append((name, thing)) descList = _descList from rdkit.Chem import rdMolDescriptors as _rdMolDescriptors MolWt = lambda *x, **y: _rdMolDescriptors._CalcMolWt(*x, **y) MolWt.version = _rdMolDescriptors._CalcMolWt_version MolWt.__doc__ = """The average molecular weight of the molecule >>> MolWt(Chem.MolFromSmiles('CC')) 30.07 >>> MolWt(Chem.MolFromSmiles('[NH4+].[Cl-]')) 53.49... """ HeavyAtomMolWt = lambda x: MolWt(x, True) HeavyAtomMolWt.__doc__ = """The average molecular weight of the molecule ignoring hydrogens >>> HeavyAtomMolWt(Chem.MolFromSmiles('CC')) 24.02...
def calc_molecular_weight(sm): sm = sm.replace("Q", DUMMY_ATOM) mol = Chem.MolFromSmiles(sm) mw = rdMolDescriptors._CalcMolWt(mol) mw = mw - DUMMY_ATOM_WEIGHT * sm.count(DUMMY_ATOM) return mw
from rdkit import Chem from rdkit.Chem import rdMolDescriptors import networkx as nx import re import copy from .write_smiles import write_smiles #***** graph processing funcs *********** SHARE_PARAMS = ["mw", "mn", "n", "pdi", "d"] DUMMY_ATOM = "Y" DUMMY_ATOM_WEIGHT = rdMolDescriptors._CalcMolWt( Chem.MolFromSmiles("[" + DUMMY_ATOM + "]")) MAX_ATOMS = 10000 def draw_chem_graph(g): """ draw chemicals from networkX object Parameters ---------- g : networkX object """ pos = nx.spring_layout(g) node_labels = nx.get_node_attributes(g, 'polymer') nx.draw_networkx_labels(g, pos, labels=node_labels, font_size=10,
if pid != '' and pid not in pubids and row[41].split( ',')[0] in prot_id2idx and row[37] != '': pubids.append(pid) # import pdb # pdb.set_trace() # print "entered" proId = row[41].split(',')[0] if proId not in prot_id2idx: print proId, pid, row[37] pd = psc_array[prot_id2idx[proId]] try: if (row[9] == '' or row[1] == '' or row[1] is None): continue else: md = Chem.MolFromSmiles(row[1]) if float(row[9]) < 100 and _CalcMolWt( md) < 1000 and md is not None: fp1 = convToArr( AllChem.GetMorganFingerprintAsBitVect(md, 1)) fp2 = convToArr( AllChem.GetMorganFingerprintAsBitVect(md, 2)) fp3 = convToArr( AllChem.GetMorganFingerprintAsBitVect(md, 3)) out = pid, proId, fp1.tolist(), fp2.tolist( ), fp3.tolist(), pd.tolist(), 1 writer.writerow(out) # print "writing done" countp += 1 elif float(row[9]) > 10000 and _CalcMolWt( md) < 100 and md is not None: fp1 = convToArr( AllChem.GetMorganFingerprintAsBitVect(md, 1))
def calc_MW_from_SMILES_list(SMILES): mol = Chem.MolFromSmiles(SMILES) return rdMolDescriptors._CalcMolWt(mol)