def calc(smi, name): m = Chem.MolFromSmiles(smi) if m is not None: try: hba = rdMolDescriptors.CalcNumHBA(m) hbd = rdMolDescriptors.CalcNumHBD(m) nrings = rdMolDescriptors.CalcNumRings(m) rtb = rdMolDescriptors.CalcNumRotatableBonds(m) psa = rdMolDescriptors.CalcTPSA(m) logp, mr = rdMolDescriptors.CalcCrippenDescriptors(m) mw = rdMolDescriptors._CalcMolWt(m) csp3 = rdMolDescriptors.CalcFractionCSP3(m) hac = m.GetNumHeavyAtoms() if hac == 0: fmf = 0 else: fmf = GetScaffoldForMol(m).GetNumHeavyAtoms() / hac qed = QED.qed(m) nrings_fused = fused_ring_count(m) n_unique_hba_hbd_atoms = count_hbd_hba_atoms(m) max_ring_size = len(max(m.GetRingInfo().AtomRings(), key=len, default=())) n_chiral_centers = len(FindMolChiralCenters(m, includeUnassigned=True)) fcsp3_bm = rdMolDescriptors.CalcFractionCSP3(GetScaffoldForMol(m)) return name, hba, hbd, hba + hbd, nrings, rtb, round(psa, 2), round(logp, 2), round(mr, 2), round(mw, 2), \ round(csp3, 3), round(fmf, 3), round(qed, 3), hac, nrings_fused, n_unique_hba_hbd_atoms, \ max_ring_size, n_chiral_centers, round(fcsp3_bm, 3) except: sys.stderr.write(f'molecule {name} was omitted due to an error in calculation of some descriptors\n') return None else: sys.stderr.write('smiles %s cannot be parsed (%s)' % (smi, name)) return None
def calc(smi, name): m = Chem.MolFromSmiles(smi) if m is not None: try: hba = rdMolDescriptors.CalcNumHBA(m) hbd = rdMolDescriptors.CalcNumHBD(m) nrings = rdMolDescriptors.CalcNumRings(m) rtb = rdMolDescriptors.CalcNumRotatableBonds(m) psa = rdMolDescriptors.CalcTPSA(m) logp, mr = rdMolDescriptors.CalcCrippenDescriptors(m) mw = rdMolDescriptors._CalcMolWt(m) csp3 = rdMolDescriptors.CalcFractionCSP3(m) hac = m.GetNumHeavyAtoms() if hac == 0: fmf = 0 else: fmf = GetScaffoldForMol(m).GetNumHeavyAtoms() / hac qed = QED.qed(m) nrings_fused = fused_ring_count(m) return name, hba, hbd, hba + hbd, nrings, rtb, round(psa, 2), round(logp, 2), round(mr, 2), round(mw, 2), \ round(csp3, 3), round(fmf, 3), round(qed, 3), hac, nrings_fused except: sys.stderr.write( f'molecule {name} was omitted due to an error in calculation of some descriptors\n' ) return None else: sys.stderr.write('smiles %s cannot be parsed (%s)' % (smi, name)) return None
def get_prop_array(mol): mw = CD.CalcExactMolWt(mol) logp = Chem.Crippen.MolLogP(mol) rotb = D.NumRotatableBonds(mol) hbd = CD.CalcNumHBD(mol) hba = CD.CalcNumHBA(mol) q = Chem.GetFormalCharge(mol) return np.array([mw, logp, rotb, hbd, hba, q])
def _calculateDescriptors(mol): df = pd.DataFrame(index=[0]) df["SlogP"] = rdMolDescriptors.CalcCrippenDescriptors(mol)[0] df["SMR"] = rdMolDescriptors.CalcCrippenDescriptors(mol)[1] df["LabuteASA"] = rdMolDescriptors.CalcLabuteASA(mol) df["TPSA"] = Descriptors.TPSA(mol) df["AMW"] = Descriptors.MolWt(mol) df["ExactMW"] = rdMolDescriptors.CalcExactMolWt(mol) df["NumLipinskiHBA"] = rdMolDescriptors.CalcNumLipinskiHBA(mol) df["NumLipinskiHBD"] = rdMolDescriptors.CalcNumLipinskiHBD(mol) df["NumRotatableBonds"] = rdMolDescriptors.CalcNumRotatableBonds(mol) df["NumHBD"] = rdMolDescriptors.CalcNumHBD(mol) df["NumHBA"] = rdMolDescriptors.CalcNumHBA(mol) df["NumAmideBonds"] = rdMolDescriptors.CalcNumAmideBonds(mol) df["NumHeteroAtoms"] = rdMolDescriptors.CalcNumHeteroatoms(mol) df["NumHeavyAtoms"] = Chem.rdchem.Mol.GetNumHeavyAtoms(mol) df["NumAtoms"] = Chem.rdchem.Mol.GetNumAtoms(mol) df["NumRings"] = rdMolDescriptors.CalcNumRings(mol) df["NumAromaticRings"] = rdMolDescriptors.CalcNumAromaticRings(mol) df["NumSaturatedRings"] = rdMolDescriptors.CalcNumSaturatedRings(mol) df["NumAliphaticRings"] = rdMolDescriptors.CalcNumAliphaticRings(mol) df["NumAromaticHeterocycles"] = \ rdMolDescriptors.CalcNumAromaticHeterocycles(mol) df["NumSaturatedHeterocycles"] = \ rdMolDescriptors.CalcNumSaturatedHeterocycles(mol) df["NumAliphaticHeterocycles"] = \ rdMolDescriptors.CalcNumAliphaticHeterocycles(mol) df["NumAromaticCarbocycles"] = \ rdMolDescriptors.CalcNumAromaticCarbocycles(mol) df["NumSaturatedCarbocycles"] = \ rdMolDescriptors.CalcNumSaturatedCarbocycles(mol) df["NumAliphaticCarbocycles"] = \ rdMolDescriptors.CalcNumAliphaticCarbocycles(mol) df["FractionCSP3"] = rdMolDescriptors.CalcFractionCSP3(mol) df["Chi0v"] = rdMolDescriptors.CalcChi0v(mol) df["Chi1v"] = rdMolDescriptors.CalcChi1v(mol) df["Chi2v"] = rdMolDescriptors.CalcChi2v(mol) df["Chi3v"] = rdMolDescriptors.CalcChi3v(mol) df["Chi4v"] = rdMolDescriptors.CalcChi4v(mol) df["Chi1n"] = rdMolDescriptors.CalcChi1n(mol) df["Chi2n"] = rdMolDescriptors.CalcChi2n(mol) df["Chi3n"] = rdMolDescriptors.CalcChi3n(mol) df["Chi4n"] = rdMolDescriptors.CalcChi4n(mol) df["HallKierAlpha"] = rdMolDescriptors.CalcHallKierAlpha(mol) df["kappa1"] = rdMolDescriptors.CalcKappa1(mol) df["kappa2"] = rdMolDescriptors.CalcKappa2(mol) df["kappa3"] = rdMolDescriptors.CalcKappa3(mol) slogp_VSA = list(map(lambda i: "slogp_VSA" + str(i), list(range(1, 13)))) df = df.assign(**dict(zip(slogp_VSA, rdMolDescriptors.SlogP_VSA_(mol)))) smr_VSA = list(map(lambda i: "smr_VSA" + str(i), list(range(1, 11)))) df = df.assign(**dict(zip(smr_VSA, rdMolDescriptors.SMR_VSA_(mol)))) peoe_VSA = list(map(lambda i: "peoe_VSA" + str(i), list(range(1, 15)))) df = df.assign(**dict(zip(peoe_VSA, rdMolDescriptors.PEOE_VSA_(mol)))) MQNs = list(map(lambda i: "MQN" + str(i), list(range(1, 43)))) df = df.assign(**dict(zip(MQNs, rdMolDescriptors.MQNs_(mol)))) return df
def n_hba(mol): """ The number of h bond acceptors. Args: mol (skchem.Mol): The molecule for which to calculate the descriptor. Returns: float """ return rdMolDescriptors.CalcNumHBA(mol)
def __init__(self, configuration: StatsExtractionConfig): self._filters = FilterTypesEnum self._columns = DataframeColumnsEnum self._stats = StatsExtractionEnum self._purging = PurgingEnum self._configuration = configuration standardisation_config_dict = self._configuration.standardisation_config standardisation_config = [ FilterConfiguration(name=name, parameters=params) for name, params in standardisation_config_dict.items() ] dec_separator = self._stats.DECORATION_SEPARATOR_TOKEN attachment_token = self._stats.ATTACHMENT_POINT_TOKEN self._mol_wts_udf = psf.udf( lambda x: ExactMolWt(Chem.MolFromSmiles(x)), pst.FloatType()) self._num_rings_udf = psf.udf( lambda x: rdMolDescriptors.CalcNumRings(Chem.MolFromSmiles(x)), pst.IntegerType()) self._num_atoms_udf = psf.udf( lambda x: Chem.MolFromSmiles(x).GetNumHeavyAtoms(), pst.IntegerType()) self._num_aromatic_rings_udf = psf.udf( lambda x: rdMolDescriptors.CalcNumAromaticRings( Chem.MolFromSmiles(x)), pst.IntegerType()) self._hbond_donors_udf = psf.udf( lambda x: rdMolDescriptors.CalcNumHBD(Chem.MolFromSmiles(x)), pst.IntegerType()) self._hbond_acceptors_udf = psf.udf( lambda x: rdMolDescriptors.CalcNumHBA(Chem.MolFromSmiles(x)), pst.IntegerType()) self._hetero_atom_ratio_udf = psf.udf( lambda x: len([ atom for atom in Chem.MolFromSmiles(x).GetAtoms() if atom.GetAtomicNum() == 6 ]) / Chem.MolFromSmiles(x).GetNumHeavyAtoms(), pst.FloatType()) self._make_canonical_udf = psf.udf( lambda x: Chem.MolToSmiles(Chem.MolFromSmiles(x)), pst.StringType()) self._standardise_smiles_udf = psf.udf( lambda x: RDKitStandardizer(standardisation_config, None). apply_filter(x), pst.StringType()) pattern = self._stats.REGEX_TOKENS self.regex = re.compile(pattern) self._tokeniser_udf = psf.udf(self.regex.findall, pst.ArrayType(pst.StringType())) self._decoration_split_udf = psf.udf(lambda x: x.split(dec_separator), pst.ArrayType(pst.StringType())) self._count_decorations_udf = psf.udf( lambda s: list(s).count(attachment_token), pst.IntegerType())
def _init_smiles(self, smiles, use_etdg_confs=False): """ Initialise a Molecule object from a SMILES sting using RDKit :param smiles: (str) SMILES string :param use_etdg_confs: (bool) override the default conformer generation and use the ETDG algorithm :return: """ logger.info('Initialising a Molecule from a SMILES string') try: self.mol_obj = Chem.MolFromSmiles(smiles) self.mol_obj = Chem.AddHs(self.mol_obj) self.charge = Chem.GetFormalCharge(self.mol_obj) self.n_rot_bonds = rdMolDescriptors.CalcNumRotatableBonds( self.mol_obj) self.n_h_donors = rdMolDescriptors.CalcNumHBD(self.mol_obj) self.n_h_acceptors = rdMolDescriptors.CalcNumHBA(self.mol_obj) except: logger.error('RDKit failed to generate mol objects') return logger.info('Running conformation generation with RDKit... running') method = AllChem.ETKDGv2( ) if use_etdg_confs is False else AllChem.ETDG() method.pruneRmsThresh = 0.3 method.numThreads = Config.n_cores conf_ids = list( AllChem.EmbedMultipleConfs(self.mol_obj, numConfs=self.n_confs, params=method)) logger.info(' ... done') try: self.volume = AllChem.ComputeMolVolume(self.mol_obj) except ValueError: logger.error('RDKit failed to compute the molecular volume') return self.bonds = [(b.GetBeginAtomIdx(), b.GetEndAtomIdx()) for b in self.mol_obj.GetBonds()] self.conformers = extract_conformers_from_rdkit_mol_object( mol_obj=self.mol_obj, conf_ids=conf_ids) # Default to the first generated conformer in the absence of any other information self.set_atoms(atoms=self.conformers[0].atoms) return None
def calc(smi, name): m = Chem.MolFromSmiles(smi) if m is not None: hba = rdMolDescriptors.CalcNumHBA(m) hbd = rdMolDescriptors.CalcNumHBD(m) nrings = rdMolDescriptors.CalcNumRings(m) rtb = rdMolDescriptors.CalcNumRotatableBonds(m) psa = rdMolDescriptors.CalcTPSA(m) logp, mr = rdMolDescriptors.CalcCrippenDescriptors(m) mw = rdMolDescriptors._CalcMolWt(m) csp3 = rdMolDescriptors.CalcFractionCSP3(m) fmf = GetScaffoldForMol(m).GetNumAtoms(onlyHeavy=True) / m.GetNumAtoms(onlyHeavy=True) return name, hba, hbd, hba + hbd, nrings, rtb, round(psa, 2), round(logp, 2), round(mr, 2), round(mw, 2), \ round(csp3, 3), round(fmf, 3) else: sys.stderr.write('smiles %s cannot be parsed (%s)' % (smi, name)) return None
def extractFeatureData(mol): smr_vsa = rdMolDescriptors.SMR_VSA_(mol) slogp_vsa = rdMolDescriptors.SlogP_VSA_(mol) peoe_vsa = rdMolDescriptors.PEOE_VSA_(mol) hbd = rdMolDescriptors.CalcNumHBD(mol) hba = rdMolDescriptors.CalcNumHBA(mol) feats = [smr_vsa, slogp_vsa, peoe_vsa, hbd, hba] feature_data = [] for f in feats: if (isinstance(f, int)): feature_data.append(f) else: for data in f: feature_data.append(data) #feature_data = np.asarray(feature_data) # convert to numpy array return feature_data
def extractFeatureData(mol): global index_of_1d_feature smr_vsa = rdMolDescriptors.SMR_VSA_(mol) slogp_vsa = rdMolDescriptors.SlogP_VSA_(mol) peoe_vsa = rdMolDescriptors.PEOE_VSA_(mol) hbd = rdMolDescriptors.CalcNumHBD(mol) hba = rdMolDescriptors.CalcNumHBA(mol) index_of_1d_feature = -1 # Need to make sure this references the index of a 1D feature # (a negative index refers to counting backwards from the end of a list) feats = [smr_vsa, slogp_vsa, peoe_vsa, hbd, hba] feature_data = [] for f in feats: if (isinstance(f, int)): feature_data.append(f) else: for data in f: feature_data.append(data) #feature_data = np.asarray(feature_data) # convert to numpy array return feature_data
def filter_druglikeness_5_rules(self, smiles): count = 0 for i in smiles: mol = Chem.MolFromSmiles(i) mol = Chem.RemoveHs(mol) MW = rdmd._CalcMolWt(mol) ALOGP = Crippen.MolLogP(mol) HBA = rdmd.CalcNumHBA(mol) HBD = rdmd.CalcNumHBD(mol) PSA = MolSurf.TPSA(mol) ROTB = rdmd.CalcNumRotatableBonds( mol, rdmd.NumRotatableBondsOptions.Strict) if MW > 600 or ALOGP > 6 or ALOGP < 0 or HBA > 11 or HBD > 7 or PSA > 180 or ROTB > 11: smiles.remove(i) count = count + 1 print("unavaliable rule_5_drug:%i" % count) return smiles
def calculate_properties(self, smiles=None, mol=None, props=[]): """this method calculates basic properties for the mol returns : error (bool)""" if len(props) == 0: return True if mol is None: mol = Chem.MolFromSmiles(smiles) if mol is None: return True if 'py_formula' in props: self.data['py_formula'] = desc.CalcMolFormula(mol) if 'py_em' in props: self.data['py_em'] = round(desc.CalcExactMolWt(mol), 5) if 'py_n_Cl_Br' in props: all_atoms = [] for atom in mol.GetAtoms(): all_atoms.append(atom.GetSymbol()) n_Cl = all_atoms.count('Cl') n_Br = all_atoms.count('Br') self.data['py_n_Cl_Br'] = n_Cl + n_Br if 'py_na' in props: self.data['py_na'] = mol.GetNumAtoms() if 'py_mw' in props: self.data['py_mw'] = desc._CalcMolWt(mol) if 'py_fsp3' in props: self.data['py_fsp3'] = desc.CalcFractionCSP3(mol) if 'py_rb' in props: self.data['py_rb'] = desc.CalcNumRotatableBonds(mol) if 'py_tpsa' in props: self.data['py_tpsa'] = desc.CalcTPSA(mol) if 'py_clogp' in props: self.data['py_clogp'] = desc.CalcCrippenDescriptors(mol)[0] if 'py_nar' in props: self.data['py_nar'] = desc.CalcNumAromaticRings(mol) if 'py_nhba' in props: self.data['py_nhba'] = desc.CalcNumHBA(mol) if 'py_nhbd' in props: self.data['py_nhbd'] = desc.CalcNumHBD(mol) return False
def calculate_scalar_descriptors(molecule, symbols): features = list() features.append(rdMD.CalcAsphericity(molecule)) features += list(rdMD.CalcCrippenDescriptors(molecule)) features.append(rdMD.CalcExactMolWt(molecule)) features.append(rdMD.CalcEccentricity(molecule)) features.append(rdMD.CalcFractionCSP3(molecule)) features.append(rdMD.CalcLabuteASA(molecule)) features.append(rdMD.CalcNPR1(molecule)) features.append(rdMD.CalcNPR2(molecule)) features.append(rdMD.CalcHallKierAlpha(molecule)) # elemental distribution symbols = np.array(symbols) features.append(np.sum(symbols == 'H')) features.append(np.sum(symbols == 'C')) features.append(np.sum(symbols == 'N')) features.append(np.sum(symbols == 'O')) features.append(np.sum(symbols == 'F')) # ring features features.append(rdMD.CalcNumAliphaticCarbocycles(molecule)) features.append(rdMD.CalcNumAliphaticHeterocycles(molecule)) features.append(rdMD.CalcNumAromaticCarbocycles(molecule)) features.append(rdMD.CalcNumAromaticHeterocycles(molecule)) features.append(rdMD.CalcNumSaturatedCarbocycles(molecule)) features.append(rdMD.CalcNumSaturatedHeterocycles(molecule)) features.append(rdMD.CalcNumSpiroAtoms( molecule)) # atom shared between rings with one bond features.append(rdMD.CalcNumBridgeheadAtoms( molecule)) # atom shared between rings with at least two bonds # other counts features.append(rdMD.CalcNumAmideBonds(molecule)) features.append(rdMD.CalcNumHBA(molecule)) # number of hydrogen acceptors features.append(rdMD.CalcNumHBD(molecule)) # number of hydrogen donors return np.array(features)
def generateCompoundPropertiesTask(structure, debug=False): if debug: pydevd.settrace('localhost', port=6901, stdoutToServer=True, stderrToServer=True) molecule = structure.molecule if not molecule.compoundProperty: prop = CompoundProperties(molecule=molecule) else: prop = molecule.compoundProperty saltRemover = SaltRemover() mol = Chem.MolFromMolBlock(str(structure.molfile)) base = saltRemover.StripMol(mol) prop.hbd = Descriptors.CalcNumHBD(mol) prop.hba = Descriptors.CalcNumHBA(mol) prop.rtb = Descriptors.CalcNumRotatableBonds(mol) prop.alogp = Crippen.MolLogP(mol) prop.psa = Descriptors.CalcTPSA(mol) prop.full_mwt = NewDescriptors.MolWt(mol) # prop.exact_mass = Descriptors.CalcExactMolWt(mol) if base.GetNumAtoms(): prop.mw_freebase = NewDescriptors.MolWt(base) prop.full_molformula = Descriptors.CalcMolFormula(mol) try: prop.save() except IntegrityError as e: if debug: print e.message else: raise e
def get_global_features(self, mol): u = [] # Now get some specific features fdefName = os.path.join(RDConfig.RDDataDir, 'BaseFeatures.fdef') factory = ChemicalFeatures.BuildFeatureFactory(fdefName) feats = factory.GetFeaturesForMol(mol) # First get some basic features natoms = mol.GetNumAtoms() nbonds = mol.GetNumBonds() mw = Descriptors.ExactMolWt(mol) HeavyAtomMolWt = Descriptors.HeavyAtomMolWt(mol) NumValenceElectrons = Descriptors.NumValenceElectrons(mol) ''' # These four descriptors are producing the value of infinity for refcode_csd = YOLJUF (CCOP(=O)(Cc1ccc(cc1)NC(=S)NP(OC(C)C)(OC(C)C)[S])OCC\t\n) MaxAbsPartialCharge = Descriptors.MaxAbsPartialCharge(mol) MaxPartialCharge = Descriptors.MaxPartialCharge(mol) MinAbsPartialCharge = Descriptors.MinAbsPartialCharge(mol) MinPartialCharge = Descriptors.MinPartialCharge(mol) ''' # FpDensityMorgan1 = Descriptors.FpDensityMorgan1(mol) # FpDensityMorgan2 = Descriptors.FpDensityMorgan2(mol) # FpDensityMorgan3 = Descriptors.FpDensityMorgan3(mol) # Get some features using chemical feature factory nbrAcceptor = 0 nbrDonor = 0 nbrHydrophobe = 0 nbrLumpedHydrophobe = 0 nbrPosIonizable = 0 nbrNegIonizable = 0 for j in range(len(feats)): #print(feats[j].GetFamily(), feats[j].GetType()) if ('Acceptor' == (feats[j].GetFamily())): nbrAcceptor = nbrAcceptor + 1 elif ('Donor' == (feats[j].GetFamily())): nbrDonor = nbrDonor + 1 elif ('Hydrophobe' == (feats[j].GetFamily())): nbrHydrophobe = nbrHydrophobe + 1 elif ('LumpedHydrophobe' == (feats[j].GetFamily())): nbrLumpedHydrophobe = nbrLumpedHydrophobe + 1 elif ('PosIonizable' == (feats[j].GetFamily())): nbrPosIonizable = nbrPosIonizable + 1 elif ('NegIonizable' == (feats[j].GetFamily())): nbrNegIonizable = nbrNegIonizable + 1 else: pass #print(feats[j].GetFamily()) # Now get some features using rdMolDescriptors moreGlobalFeatures = [rdm.CalcNumRotatableBonds(mol), rdm.CalcChi0n(mol), rdm.CalcChi0v(mol), \ rdm.CalcChi1n(mol), rdm.CalcChi1v(mol), rdm.CalcChi2n(mol), rdm.CalcChi2v(mol), \ rdm.CalcChi3n(mol), rdm.CalcChi4n(mol), rdm.CalcChi4v(mol), \ rdm.CalcFractionCSP3(mol), rdm.CalcHallKierAlpha(mol), rdm.CalcKappa1(mol), \ rdm.CalcKappa2(mol), rdm.CalcLabuteASA(mol), \ rdm.CalcNumAliphaticCarbocycles(mol), rdm.CalcNumAliphaticHeterocycles(mol), \ rdm.CalcNumAliphaticRings(mol), rdm.CalcNumAmideBonds(mol), \ rdm.CalcNumAromaticCarbocycles(mol), rdm.CalcNumAromaticHeterocycles(mol), \ rdm.CalcNumAromaticRings(mol), rdm.CalcNumBridgeheadAtoms(mol), rdm.CalcNumHBA(mol), \ rdm.CalcNumHBD(mol), rdm.CalcNumHeteroatoms(mol), rdm.CalcNumHeterocycles(mol), \ rdm.CalcNumLipinskiHBA(mol), rdm.CalcNumLipinskiHBD(mol), rdm.CalcNumRings(mol), \ rdm.CalcNumSaturatedCarbocycles(mol), rdm.CalcNumSaturatedHeterocycles(mol), \ rdm.CalcNumSaturatedRings(mol), rdm.CalcNumSpiroAtoms(mol), rdm.CalcTPSA(mol)] u = [natoms, nbonds, mw, HeavyAtomMolWt, NumValenceElectrons, \ nbrAcceptor, nbrDonor, nbrHydrophobe, nbrLumpedHydrophobe, \ nbrPosIonizable, nbrNegIonizable] u = u + moreGlobalFeatures u = np.array(u).T # Some of the descriptors produice NAN. We can convert them to 0 # If you are getting outliers in the training or validation set this could be # Because some important features were set to zero here because it produced NAN # Removing those features from the feature set might remove the outliers #u[np.isnan(u)] = 0 #u = torch.tensor(u, dtype=torch.float) return (u)
# rotatable. RotatableBondSmarts = Chem.MolFromSmarts('[!$(*#*)&!D1]-&!@[!$(*#*)&!D1]') NHOHSmarts = Chem.MolFromSmarts('[#8H1,#7H1,#7H2,#7H3]') NOCountSmarts = Chem.MolFromSmarts('[#7,#8]') # this little trick saves duplicated code def _NumMatches(mol, smarts): return len(mol.GetSubstructMatches(smarts, uniquify=1)) NumHDonors = lambda x: rdMolDescriptors.CalcNumHBD(x) NumHDonors.__doc__ = "Number of Hydrogen Bond Donors" NumHDonors.version = "1.0.0" _HDonors = lambda x, y=HDonorSmarts: x.GetSubstructMatches(y, uniquify=1) NumHAcceptors = lambda x: rdMolDescriptors.CalcNumHBA(x) NumHAcceptors.__doc__ = "Number of Hydrogen Bond Acceptors" NumHAcceptors.version = "2.0.0" _HAcceptors = lambda x, y=HAcceptorSmarts: x.GetSubstructMatches(y, uniquify=1) NumHeteroatoms = lambda x: rdMolDescriptors.CalcNumHeteroatoms(x) NumHeteroatoms.__doc__ = "Number of Heteroatoms" NumHeteroatoms.version = "1.0.0" _Heteroatoms = lambda x, y=HeteroatomSmarts: x.GetSubstructMatches(y, uniquify=1) NumRotatableBonds = lambda x: rdMolDescriptors.CalcNumRotatableBonds(x) NumRotatableBonds.__doc__ = "Number of Rotatable Bonds" NumRotatableBonds.version = "1.0.0" _RotatableBonds = lambda x, y=RotatableBondSmarts: x.GetSubstructMatches( y, uniquify=1) NOCount = lambda x: rdMolDescriptors.CalcNumLipinskiHBA(x) NOCount.__doc__ = "Number of Nitrogens and Oxygens"
def get_fingerprint(SMILES=None, E_BIND=None): """ PRE: Takes in a MOLECULE as a SMILES POST: Prints its finger prints as two list, the first contains the names, the second contains the fingerprints """ def get_atoms_coords(RDKIT_BLOCK): """Takes as input an RDKIT BLOCK and returns a list of atoms with a numpy array containing the coordinates""" RDKIT_BLOCK = RDKIT_BLOCK.split('\n') atm_number = int(RDKIT_BLOCK[3][:3]) RDKIT_BLOCK = [x.split() for x in RDKIT_BLOCK] atm_list = [] coords_array = np.zeros([atm_number, 3], dtype=float) for i, line in enumerate(RDKIT_BLOCK[4:4 + atm_number]): coords_atm = line atm_list.append(coords_atm[3]) coords_array[i, :] = coords_atm[:3] return atm_list, coords_array def get_atom_types(mol): """ PRE: Takes in the mol POST: Returns a dictionary with the atom types and numbers """ atom_types = {} for atom in mol.GetAtoms(): symbol = atom.GetSymbol() if symbol in atom_types: atom_types[symbol] += 1 else: atom_types[symbol] = 1 return atom_types def AreRingFused(mol): """ PRE : Takes in a mol rdkit POST : Returns the max number of fused rings. That is the maximum number of rings any atom belongs to """ rings = Chem.GetSymmSSSR(mol) ring_dic = {} for ring in rings: for atom in list(ring): if atom in ring_dic: ring_dic[atom] += 1 else: ring_dic[atom] = 1 if ring_dic.values() == []: return 0 else: return max(ring_dic.values()) def getVolume(mol, atom_types): """ PRE: Takes in a mol with HYDROGENS ADDED POST: Returns its volume computed as a linear combination of the contribution of the vdW volumes """ index_of_vols = {'H': 7.24, 'C': 20.58, 'N': 15.60, 'O': 14.71, 'F': 13.31, 'Cl': 22.45, 'Br': 26.52, 'I': 32.52, 'P': 24.43, 'S': 24.43, 'As': 26.52, 'B': 40.48, 'Si': 38.79, 'Se': 28.73, 'Te': 36.62} gross_volume = 0 # for sym in atom_types: # gross_volume += atom_types[sym] * index_of_vols[sym] bonds = mol.GetNumBonds() rings = Chem.GetSymmSSSR(mol) # print 'aromatic ring count is ',descriptors.CalcNumAromaticRings(mol) # print 'aliphatic ring count is ',descriptors.CalcNumAliphaticRings(mol) ra = 0 largest_ra = 0 rna = 0 largest_rna = 0 for ringId in range(len(rings)): if isRingAromatic(mol, tuple(rings[ringId])): ra += 1 if largest_ra < len(rings[ringId]): largest_ra = len(rings[ringId]) else: rna += 1 if largest_rna < len(rings[ringId]): largest_rna = len(rings[ringId]) # volume = gross_volume - 5.92 * bonds - 14.7 * ra - 3.8 * rna try: AllChem.EmbedMolecule(mol) AllChem.MMFFOptimizeMolecule(mol) volume = AllChem.ComputeMolVolume(mol) except: raise ValueError("Can't build the molecule") return volume, ra, rna, largest_ra, largest_rna def isRingAromatic(mol, ring): """ PRE: Takes in a mol and a ring given as a tuple of atom id POST: Returns TRUE is all the atoms inside the ring are aromatic and FALSE otherwise """ aromatic = True for ids in ring: if mol.GetAtomWithIdx(ids).GetIsAromatic(): # print ids pass else: aromatic = False break return aromatic mol = SMILES features = [ 'atomNbr', 'Volume', 'NAtom', 'OAtom', 'SAtom', 'PAtom', 'ClAtom', 'BrAtom', 'FAtom', 'IAtom', 'AromaticRingNumber', 'LargestAromaticRingAtomNbr', 'NonAromaticRingNumber', 'LargestNonAromaticRingAtomNbr', 'MaxNbrFusedRings', 'SurfaceArea', 'Charge', # 'MinRadiusOfCylinder', # 'RadiusOfCylinderBestConf', 'NitroNbr', 'AlcoholNbr', 'KetoneNbr', 'NitrileNbr', 'ThiolNbr', 'Phenol_likeNbr', 'EsterNbr', 'SulfideNbr', 'CarboxilicAcidNbr', 'EtherNbr', 'AmideNbr', 'AnilineNbr', 'PrimaryAmineNbr', 'SecondaryAmineNbr', 'RotableBondNum', 'HBondDonor', 'HBondAcceptor', 'MolLogP', 'MolMR' ] for i in range(6): features.append('Chi{}v'.format(i + 1)) features.append('Chi{}n'.format(i + 1)) if i < 3: features.append('Kappa{}'.format(i + 1)) feature_dic = dict.fromkeys(features) if mol == None: return sorted(feature_dic.keys()) mol = Chem.MolFromSmiles(SMILES) mol = Chem.AddHs(mol) feature_dic['RotableBondNum'] = descriptors.CalcNumRotatableBonds(mol) for i in range(6): feature_dic['Chi{}v'.format(i + 1)] = descriptors.CalcChiNv(mol, i + 1) feature_dic['Chi{}n'.format(i + 1)] = descriptors.CalcChiNn(mol, i + 1) feature_dic['Kappa1'] = descriptors.CalcKappa1(mol) feature_dic['Kappa2'] = descriptors.CalcKappa2(mol) feature_dic['Kappa3'] = descriptors.CalcKappa3(mol) feature_dic['HBondAcceptor'] = descriptors.CalcNumHBA(mol) feature_dic['HBondDonor'] = descriptors.CalcNumHBD(mol) CrippenDescriptors = descriptors.CalcCrippenDescriptors(mol) feature_dic['MolLogP'] = CrippenDescriptors[0] feature_dic['MolMR'] = CrippenDescriptors[1] atom_types = get_atom_types(mol) for feat, symbol in zip(['NAtom', 'OAtom', 'SAtom', 'PAtom', 'ClAtom', 'BrAtom', 'FAtom', 'IAtom'], ['N', 'O', 'S', 'P', 'Cl', 'Br', 'F', 'I']): if symbol in atom_types: feature_dic[feat] = atom_types[symbol] else: feature_dic[feat] = 0 feature_dic['atomNbr'] = mol.GetNumHeavyAtoms() feature_dic['Volume'], feature_dic['AromaticRingNumber'], feature_dic['NonAromaticRingNumber'], feature_dic[ 'LargestAromaticRingAtomNbr'], feature_dic['LargestNonAromaticRingAtomNbr'] = getVolume(mol, atom_types) feature_dic['MaxNbrFusedRings'] = AreRingFused(mol) feature_dic['SurfaceArea'] = descriptors.CalcTPSA(mol) feature_dic['Charge'] = Chem.GetFormalCharge(mol) funct_dic = { '[$([NX3](=O)=O),$([NX3+](=O)[O-])][!#8]': 'NitroNbr', '[#6][OX2H]': 'AlcoholNbr', '[NX1]#[CX2]': 'NitrileNbr', '[#6][CX3](=O)[#6]': 'KetoneNbr', '[#16X2H]': 'ThiolNbr', "[OX2H][cX3][c]": 'Phenol_likeNbr', '[#6][CX3](=O)[OX2H0][#6]': 'EsterNbr', '[#16X2H0]': 'SulfideNbr', '[CX3](=O)[OX2H1]': 'CarboxilicAcidNbr', '[OD2]([#6])[#6]': 'EtherNbr', # '[NX3][CX3](=[OX1])[#6]':'AmideNbr', '[#7X3][#6X3](=[OX1])[#6]': 'AmideNbr', '[NX3][cc]': 'AnilineNbr', '[NX3H2;!$(NC=O)]': 'PrimaryAmineNbr', '[NX3H1;!$(NC=O)]': 'SecondaryAmineNbr'} for funct in funct_dic: patt = Chem.MolFromSmarts(funct) feature_dic[funct_dic[funct]] = len(mol.GetSubstructMatches(patt)) # names, coords = get_atoms_coords(Chem.MolToMolBlock(mol)) # feature_dic['MinRadiusOfCylinder'] = returnCircleAsTuple(coords[:,1:])[2] # feature_dic['MinRadiusOfCylinder'] = RADIUS[0] # feature_dic['RadiusOfCylinderBestConf'] = RADIUS[1] values = [] for key in sorted(feature_dic.keys()): values.append(feature_dic[key]) # print key, feature_dic[key] return values
'HBD', 'jIndex' ] for name in prop_names: d[f'{name}'] = [] for i, s in enumerate(smiles): if (i % 10000 == 0): print(i) m = Chem.MolFromSmiles(s) if (m == None or 'i' in s or '.' in s): DUD = DUD.drop(i) print(s, i) else: d['QED'].append(QED.default(m)) d['logP'].append(Crippen.MolLogP(m)) d['molWt'].append(Descriptors.MolWt(m)) d['maxCharge'].append(Descriptors.MaxPartialCharge(m)) d['minCharge'].append(Descriptors.MinPartialCharge(m)) d['valence'].append(Descriptors.NumValenceElectrons(m)) d['TPSA'].append(rdMolDescriptors.CalcTPSA(m)) d['HBA'].append(rdMolDescriptors.CalcNumHBA(m)) d['HBD'].append(rdMolDescriptors.CalcNumHBD(m)) d['jIndex'].append(GraphDescriptors.BalabanJ(m)) df = pd.DataFrame.from_dict(d) df_merge = pd.merge(df, DUD, on=df.index) #df_merge.to_csv('/home/mcb/jboitr/data/DUD_full.csv') df_merge.to_csv('C:/Users/jacqu/Documents/data/DUD_full.csv')
def loadSDF(sdfPath): # Create images #generateImages(sdfPath) # Create a molecule supplier suppl = Chem.SDMolSupplier(sdfPath) # Filter empty entries sdf = [x for x in suppl if x is not None] # For each molecule in supplier for mol in sdf: data = {} try: data['fCharge'] = mol.GetProp('Charge') except: data['fCharge'] = Chem.GetFormalCharge(mol) try: data['name'] = mol.GetProp('DATABASE_ID') except: data['name'] = 'unkown' try: data['molMass'] = mol.GetProp('Total Molweight') except: data['molMass'] = Descriptors.ExactMolWt(mol) try: data['cLogP'] = mol.GetProp('cLogP') except: data['cLogP'] = Crippen.MolLogP(mol) # não sei se ta certo try: data['cLogS'] = mol.GetProp('cLogS') except: data['cLogS'] = 0.0 try: data['tpsa'] = mol.GetProp('Polar Surface Area') except: data['tpsa'] = rdMolDescriptors.CalcTPSA(mol) try: data['totalSurfaceArea'] = mol.GetProp('Total Surface Area') except: data['totalSurfaceArea'] = rdMolDescriptors.CalcTPSA(mol) try: data['hbondAcceptors'] = mol.GetProp('H-Acceptors') except: data['hbondAcceptors'] = rdMolDescriptors.CalcNumHBA(mol) try: data['hbondDonnors'] = mol.GetProp('H-Donors') except: data['hbondDonnors'] = rdMolDescriptors.CalcNumHBD(mol) try: data['rotable'] = mol.GetProp('Rotatable Bonds') except: data['rotable'] = rdMolDescriptors.CalcNumRotatableBonds(mol) try: data['mutagenic'] = mol.GetProp('Mutagenic') except: data['mutagenic'] = 'Unknown' try: data['tumorigenic'] = mol.GetProp('Tumorigenic') except: data['tumorigenic'] = 'Unknown' try: data['irritant'] = mol.GetProp('Irritant') except: data['irritant'] = 'Unkown' try: data['smiles'] = mol.GetProp('SMILES') except: data['smiles'] = Chem.MolToSmiles(mol) try: data['InChI'] = mol.GetProp('INCHI_IDENTIFIER') except: data['InChI'] = inchi.MolToInchi(mol) try: data['inchiKey'] = mol.GetProp('INCHI_KEY') except: data['inchiKey'] = inchi.MolToInchiKey(mol) try: data['nonHAtoms'] = mol.GetProp('Non-H Atoms') except: data['nonHAtoms'] = -1 # Não sei calcular try: data['numAtoms'] = mol.GetProp('numAtoms') except: data['numAtoms'] = mol.GetNumAtoms() try: data['stereoCenters'] = mol.GetProp('Stereo Centers') except: data['stereoCenters'] = mol.GetNumAtoms() try: data['provider'] = mol.GetProp('DATABASE_NAME') except: print("Nenhum fornecedor encontrado, o campo é obrigatório!") continue tmp = AllChem.Compute2DCoords(mol) # Compute its coordinates Draw.MolToFile(mol, os.path.join(settings.FILES_DIR, f'molImages/' + data["inchiKey"] + '.png'), size=(300,300), kekulize=True, wedgeBonds=True, fitImage=True) # Save it Draw.MolToFile(mol, os.path.join(settings.FILES_DIR, f'molThumbs/' + data["inchiKey"] + '.png'), size=(150,150), kekulize=True, wedgeBonds=True, fitImage=True) feedDatabase(data) if Compounds.objects.filter(inChIKey=data['inchiKey']).exists(): if not Compounds.objects.filter(provider=['provider']).exists(): feedDatabase(data) print("feed1") # append no sdf da base de dados a = 1 else: print("continue123") continue else: a = 1 feedDatabase(data) print("feed2") '''except:
def datadump(database, dumpdir): db = pickle.load(open(database, "rb")) if os.path.exists(dumpdir): raise Warning( "Caution, %s already exists. Already existing data may be overwritten." ) else: os.mkdir(dumpdir) os.mkdir(dumpdir + "/png") frag2mol = db.get_frag2mol() frag2lcapconn = db.get_frag2lcapconn() frag2rcapconn = db.get_frag2rcapconn() mol2frag = db.get_mol2frag() mol2conn = db.get_mol2conn() frag_log = logger(dumpdir + "/frag.dat") frag_log.log("### datadump of database %s" % database) frag_log.log("### timestamp %s" % time.asctime(time.localtime(time.time()))) frag_log.log("### written by run_fragresp.py datadump routine.") frag_log.log("###") frag_log.log("### ----------------- ###") frag_log.log("### FRAGMENT DATA LOG ###") frag_log.log("### ----------------- ###") frag_log.log("###") frag_log.log( "# id smiles mol_id lcap_id rcap_id Natoms Nbonds Nnonhatoms Chg Nhbd Nhba Nrotbonds Nrings" ) for frag_i in range(db.get_frag_count()): frag = db.get_frag(frag_i) Chem.SanitizeMol(frag) log_str = list() ### id log_str.append(str(frag_i) + " ") ### smiles log_str.append(str(Chem.MolToSmiles(frag, isomericSmiles=True)) + " ") ### mol_id mol_count = len(frag2mol[frag_i]) if mol_count == 0: log_str.append("-1 ") else: for i in range(mol_count): mol_i = frag2mol[frag_i][i] if i < mol_count - 1: log_str.append(str(mol_i) + ",") else: log_str.append(str(mol_i) + " ") ### lcap_id lcap_count = len(frag2lcapconn[frag_i]) if lcap_count == 0: log_str.append("-1 ") else: for i in range(lcap_count): cap_i = frag2lcapconn[frag_i][i] if i < lcap_count - 1: log_str.append(str(cap_i) + ",") else: log_str.append(str(cap_i) + " ") ### rcap_id rcap_count = len(frag2rcapconn[frag_i]) if rcap_count == 0: log_str.append("-1 ") else: for i in range(rcap_count): cap_i = frag2rcapconn[frag_i][i] if i < rcap_count - 1: log_str.append(str(cap_i) + ",") else: log_str.append(str(cap_i) + " ") ### N_atoms log_str.append(str(frag.GetNumAtoms()) + " ") ### N_bonds log_str.append(str(frag.GetNumBonds()) + " ") ### Nnonhatoms log_str.append(str(frag.GetNumHeavyAtoms()) + " ") ### Chg log_str.append(str(rdmolops.GetFormalCharge(frag)) + " ") ### Nhbd log_str.append(str(rdMolDescriptors.CalcNumHBD(frag)) + " ") ### Nhba log_str.append(str(rdMolDescriptors.CalcNumHBA(frag)) + " ") ### Nrotbonds log_str.append(str(rdMolDescriptors.CalcNumRotatableBonds(frag)) + " ") ### Nrings log_str.append(str(rdMolDescriptors.CalcNumRings(frag)) + " ") frag_log.log("".join(log_str)) png_path = dumpdir + "/png/" + "frag_%d.png" % frag_i try: Chem.SanitizeMol(frag) AllChem.Compute2DCoords(frag) Draw.MolToFile(frag, png_path, size=(500, 500)) except: #Chem.Kekulize(frag) print("Could not save frag %d to disk." % frag_i) frag_log.close() mol_log = logger(dumpdir + "/mol.dat") mol_log.log("### datadump of database %s" % database) mol_log.log("### timestamp %s" % time.asctime(time.localtime(time.time()))) mol_log.log("### written by run_fragresp.py datadump routine.") mol_log.log("###") mol_log.log("### ----------------- ###") mol_log.log("### MOLECULE DATA LOG ###") mol_log.log("### ----------------- ###") mol_log.log("###") mol_log.log( "# id name smiles frag_id Natoms Nbonds Nnonhatoms Chg Nhbd Nhba Nrotbonds Nrings" ) for mol_i in range(db.get_mol_count()): mol = db.get_mol(mol_i) Chem.SanitizeMol(mol) name = db.get_name(mol_i) decomp = db.get_decompose(mol_i) log_str = list() log_str.append(str(mol_i) + " ") log_str.append(name + " ") log_str.append(str(Chem.MolToSmiles(mol, isomericSmiles=True)) + " ") frag_count = decomp.get_frag_count() if frag_count == 0: log_str.append("-1 ") else: for i in range(frag_count): frag_i = mol2frag[mol_i][i] if i < frag_count - 1: log_str.append(str(frag_i) + ",") else: log_str.append(str(frag_i) + " ") log_str.append(str(mol.GetNumAtoms()) + " ") log_str.append(str(mol.GetNumBonds()) + " ") log_str.append(str(mol.GetNumHeavyAtoms()) + " ") log_str.append(str(rdmolops.GetFormalCharge(mol)) + " ") log_str.append(str(rdMolDescriptors.CalcNumHBD(mol)) + " ") log_str.append(str(rdMolDescriptors.CalcNumHBA(mol)) + " ") log_str.append(str(rdMolDescriptors.CalcNumRotatableBonds(mol)) + " ") log_str.append(str(rdMolDescriptors.CalcNumRings(mol)) + " ") mol_log.log("".join(log_str)) png_path = dumpdir + "/png/" + "mol_%d.png" % mol_i AllChem.Compute2DCoords(mol) Chem.Kekulize(mol) Draw.MolToFile(mol, png_path, size=(500, 500)) mol_log.close() surr_log = logger(dumpdir + "/surr.dat") surr_log.log("### datadump of database %s" % database) surr_log.log("### timestamp %s" % time.asctime(time.localtime(time.time()))) surr_log.log("### written by run_fragresp.py datadump routine.") surr_log.log("###") surr_log.log("### ----------------- ###") surr_log.log("### SURROGATE DATA LOG ###") surr_log.log("### ------------------ ###") surr_log.log("###") surr_log.log( "# id name smiles mol_id Natoms Nbonds Nnonhatoms Chg Nhbd Nhba Nrotbonds Nrings" ) for conn_i, conn in enumerate(db.get_conn_list()): if conn.get_terminal(): continue name = conn.get_name() conn_cap = conn.get_surrogate_cap() Chem.SanitizeMol(conn_cap) log_str = list() log_str.append(str(conn_i) + " ") log_str.append(name + " ") log_str.append( str(Chem.MolToSmiles(conn_cap, isomericSmiles=True)) + " ") conn2mol = db.get_conn2mol()[conn_i] mol_count = len(conn2mol) if mol_count == 0: log_str.append("-1 ") else: for i in range(mol_count): mol_i = conn2mol[i] if i < mol_count - 1: log_str.append(str(mol_i) + ",") else: log_str.append(str(mol_i) + " ") log_str.append(str(conn_cap.GetNumAtoms()) + " ") log_str.append(str(conn_cap.GetNumBonds()) + " ") log_str.append(str(conn_cap.GetNumHeavyAtoms()) + " ") log_str.append(str(rdmolops.GetFormalCharge(conn_cap)) + " ") log_str.append(str(rdMolDescriptors.CalcNumHBD(conn_cap)) + " ") log_str.append(str(rdMolDescriptors.CalcNumHBA(conn_cap)) + " ") log_str.append( str(rdMolDescriptors.CalcNumRotatableBonds(conn_cap)) + " ") log_str.append(str(rdMolDescriptors.CalcNumRings(conn_cap)) + " ") surr_log.log("".join(log_str)) png_path = dumpdir + "/png/" + "surr_%s.png" % (conn_i) AllChem.Compute2DCoords(conn_cap) Chem.Kekulize(conn_cap) Draw.MolToFile(conn_cap, png_path, size=(500, 500)) surr_log.close()
def calculate(self): return rdMolDescriptors.CalcNumHBA(self.mol)
def get_molecular_features(dataframe, mol_list): df = dataframe for i in range(len(mol_list)): print("Getting molecular features for molecule: ", i) mol = mol_list[i] natoms = mol.GetNumAtoms() nbonds = mol.GetNumBonds() mw = Descriptors.ExactMolWt(mol) df.at[i,"NbrAtoms"] = natoms df.at[i,"NbrBonds"] = nbonds df.at[i,"mw"] = mw df.at[i,'HeavyAtomMolWt'] = Chem.Descriptors.HeavyAtomMolWt(mol) df.at[i,'NumValenceElectrons'] = Chem.Descriptors.NumValenceElectrons(mol) ''' # These four descriptors are producing the value of infinity for refcode_csd = YOLJUF (CCOP(=O)(Cc1ccc(cc1)NC(=S)NP(OC(C)C)(OC(C)C)[S])OCC\t\n) df.at[i,'MaxAbsPartialCharge'] = Chem.Descriptors.MaxAbsPartialCharge(mol) df.at[i,'MaxPartialCharge'] = Chem.Descriptors.MaxPartialCharge(mol) df.at[i,'MinAbsPartialCharge'] = Chem.Descriptors.MinAbsPartialCharge(mol) df.at[i,'MinPartialCharge'] = Chem.Descriptors.MinPartialCharge(mol) ''' df.at[i,'FpDensityMorgan1'] = Chem.Descriptors.FpDensityMorgan1(mol) df.at[i,'FpDensityMorgan2'] = Chem.Descriptors.FpDensityMorgan2(mol) df.at[i,'FpDensityMorgan3'] = Chem.Descriptors.FpDensityMorgan3(mol) #print(natoms, nbonds) # Now get some specific features fdefName = os.path.join(RDConfig.RDDataDir,'BaseFeatures.fdef') factory = ChemicalFeatures.BuildFeatureFactory(fdefName) feats = factory.GetFeaturesForMol(mol) #df["Acceptor"] = 0 #df["Aromatic"] = 0 #df["Hydrophobe"] = 0 nbrAcceptor = 0 nbrDonor = 0 nbrHydrophobe = 0 nbrLumpedHydrophobe = 0 nbrPosIonizable = 0 nbrNegIonizable = 0 for j in range(len(feats)): #print(feats[j].GetFamily(), feats[j].GetType()) if ('Acceptor' == (feats[j].GetFamily())): nbrAcceptor = nbrAcceptor + 1 elif ('Donor' == (feats[j].GetFamily())): nbrDonor = nbrDonor + 1 elif ('Hydrophobe' == (feats[j].GetFamily())): nbrHydrophobe = nbrHydrophobe + 1 elif ('LumpedHydrophobe' == (feats[j].GetFamily())): nbrLumpedHydrophobe = nbrLumpedHydrophobe + 1 elif ('PosIonizable' == (feats[j].GetFamily())): nbrPosIonizable = nbrPosIonizable + 1 elif ('NegIonizable' == (feats[j].GetFamily())): nbrNegIonizable = nbrNegIonizable + 1 else: pass#print(feats[j].GetFamily()) df.at[i,"Acceptor"] = nbrAcceptor df.at[i,"Donor"] = nbrDonor df.at[i,"Hydrophobe"] = nbrHydrophobe df.at[i,"LumpedHydrophobe"] = nbrLumpedHydrophobe df.at[i,"PosIonizable"] = nbrPosIonizable df.at[i,"NegIonizable"] = nbrNegIonizable # We can also get some more molecular features using rdMolDescriptors df.at[i,"NumRotatableBonds"] = rdMolDescriptors.CalcNumRotatableBonds(mol) df.at[i,"CalcChi0n"] = rdMolDescriptors.CalcChi0n(mol) df.at[i,"CalcChi0v"] = rdMolDescriptors.CalcChi0v(mol) df.at[i,"CalcChi1n"] = rdMolDescriptors.CalcChi1n(mol) df.at[i,"CalcChi1v"] = rdMolDescriptors.CalcChi1v(mol) df.at[i,"CalcChi2n"] = rdMolDescriptors.CalcChi2n(mol) df.at[i,"CalcChi2v"] = rdMolDescriptors.CalcChi2v(mol) df.at[i,"CalcChi3n"] = rdMolDescriptors.CalcChi3n(mol) df.at[i,"CalcChi3v"] = rdMolDescriptors.CalcChi3v(mol) df.at[i,"CalcChi4n"] = rdMolDescriptors.CalcChi4n(mol) df.at[i,"CalcChi4v"] = rdMolDescriptors.CalcChi4v(mol) df.at[i,"CalcFractionCSP3"] = rdMolDescriptors.CalcFractionCSP3(mol) df.at[i,"CalcHallKierAlpha"] = rdMolDescriptors.CalcHallKierAlpha(mol) df.at[i,"CalcKappa1"] = rdMolDescriptors.CalcKappa1(mol) df.at[i,"CalcKappa2"] = rdMolDescriptors.CalcKappa2(mol) #df.at[i,"CalcKappa3"] = rdMolDescriptors.CalcKappa3(mol) df.at[i,"CalcLabuteASA"] = rdMolDescriptors.CalcLabuteASA(mol) df.at[i,"CalcNumAliphaticCarbocycles"] = rdMolDescriptors.CalcNumAliphaticCarbocycles(mol) df.at[i,"CalcNumAliphaticHeterocycles"] = rdMolDescriptors.CalcNumAliphaticHeterocycles(mol) df.at[i,"CalcNumAliphaticRings"] = rdMolDescriptors.CalcNumAliphaticRings(mol) df.at[i,"CalcNumAmideBonds"] = rdMolDescriptors.CalcNumAmideBonds(mol) df.at[i,"CalcNumAromaticCarbocycles"] = rdMolDescriptors.CalcNumAromaticCarbocycles(mol) df.at[i,"CalcNumAromaticHeterocycles"] = rdMolDescriptors.CalcNumAromaticHeterocycles(mol) df.at[i,"CalcNumAromaticRings"] = rdMolDescriptors.CalcNumAromaticRings(mol) df.at[i,"CalcNumBridgeheadAtoms"] = rdMolDescriptors.CalcNumBridgeheadAtoms(mol) df.at[i,"CalcNumHBA"] = rdMolDescriptors.CalcNumHBA(mol) df.at[i,"CalcNumHBD"] = rdMolDescriptors.CalcNumHBD(mol) df.at[i,"CalcNumHeteroatoms"] = rdMolDescriptors.CalcNumHeteroatoms(mol) df.at[i,"CalcNumHeterocycles"] = rdMolDescriptors.CalcNumHeterocycles(mol) df.at[i,"CalcNumLipinskiHBA"] = rdMolDescriptors.CalcNumLipinskiHBA(mol) df.at[i,"CalcNumLipinskiHBD"] = rdMolDescriptors.CalcNumLipinskiHBD(mol) df.at[i,"CalcNumRings"] = rdMolDescriptors.CalcNumRings(mol) df.at[i,"CalcNumSaturatedCarbocycles"] = rdMolDescriptors.CalcNumSaturatedCarbocycles(mol) df.at[i,"CalcNumSaturatedHeterocycles"] = rdMolDescriptors.CalcNumSaturatedHeterocycles(mol) df.at[i,"CalcNumSaturatedRings"] = rdMolDescriptors.CalcNumSaturatedRings(mol) df.at[i,"CalcNumSpiroAtoms"] = rdMolDescriptors.CalcNumSpiroAtoms(mol) df.at[i,"CalcTPSA"] = rdMolDescriptors.CalcTPSA(mol) return(df)
def compute_HBA(self, mol_input): return rdMolDescriptors.CalcNumHBA(mol_input)
num_Hs = atom.GetTotalNumHs() H_count += num_Hs feature_list.append(count) feature_list[16] = H_count #Calculates the total mass of the aromatic atoms in the molecule mass_aromatic_atoms = 0 for atom in mol_obj.GetAtoms(): if atom.GetIsAromatic(): mass_aromatic_atoms += atom.GetMass() num_Hs = atom.GetTotalNumHs() mass_aromatic_atoms += H_mass * num_Hs feature_list.append(mass_aromatic_atoms) #Counting number of H-bond donors and acceptors HBAs = Descriptor.CalcNumHBA(mol_obj) HBDs = Descriptor.CalcNumHBD(mol_obj) feature_list.append(HBAs + HBDs) return tuple(feature_list) #Overwrites the feature_array key of the second-level dictionary of the molecule for mol in suppl: if mol is None: continue #Defines the chemical name of the molecule chemical_name = mol.GetProp('ChemName').rstrip() #Try's to update the feature_array key of the second-level dictionary of the molecule try: