def _calculateDescriptors(mol): df = pd.DataFrame(index=[0]) df["SlogP"] = rdMolDescriptors.CalcCrippenDescriptors(mol)[0] df["SMR"] = rdMolDescriptors.CalcCrippenDescriptors(mol)[1] df["LabuteASA"] = rdMolDescriptors.CalcLabuteASA(mol) df["TPSA"] = Descriptors.TPSA(mol) df["AMW"] = Descriptors.MolWt(mol) df["ExactMW"] = rdMolDescriptors.CalcExactMolWt(mol) df["NumLipinskiHBA"] = rdMolDescriptors.CalcNumLipinskiHBA(mol) df["NumLipinskiHBD"] = rdMolDescriptors.CalcNumLipinskiHBD(mol) df["NumRotatableBonds"] = rdMolDescriptors.CalcNumRotatableBonds(mol) df["NumHBD"] = rdMolDescriptors.CalcNumHBD(mol) df["NumHBA"] = rdMolDescriptors.CalcNumHBA(mol) df["NumAmideBonds"] = rdMolDescriptors.CalcNumAmideBonds(mol) df["NumHeteroAtoms"] = rdMolDescriptors.CalcNumHeteroatoms(mol) df["NumHeavyAtoms"] = Chem.rdchem.Mol.GetNumHeavyAtoms(mol) df["NumAtoms"] = Chem.rdchem.Mol.GetNumAtoms(mol) df["NumRings"] = rdMolDescriptors.CalcNumRings(mol) df["NumAromaticRings"] = rdMolDescriptors.CalcNumAromaticRings(mol) df["NumSaturatedRings"] = rdMolDescriptors.CalcNumSaturatedRings(mol) df["NumAliphaticRings"] = rdMolDescriptors.CalcNumAliphaticRings(mol) df["NumAromaticHeterocycles"] = \ rdMolDescriptors.CalcNumAromaticHeterocycles(mol) df["NumSaturatedHeterocycles"] = \ rdMolDescriptors.CalcNumSaturatedHeterocycles(mol) df["NumAliphaticHeterocycles"] = \ rdMolDescriptors.CalcNumAliphaticHeterocycles(mol) df["NumAromaticCarbocycles"] = \ rdMolDescriptors.CalcNumAromaticCarbocycles(mol) df["NumSaturatedCarbocycles"] = \ rdMolDescriptors.CalcNumSaturatedCarbocycles(mol) df["NumAliphaticCarbocycles"] = \ rdMolDescriptors.CalcNumAliphaticCarbocycles(mol) df["FractionCSP3"] = rdMolDescriptors.CalcFractionCSP3(mol) df["Chi0v"] = rdMolDescriptors.CalcChi0v(mol) df["Chi1v"] = rdMolDescriptors.CalcChi1v(mol) df["Chi2v"] = rdMolDescriptors.CalcChi2v(mol) df["Chi3v"] = rdMolDescriptors.CalcChi3v(mol) df["Chi4v"] = rdMolDescriptors.CalcChi4v(mol) df["Chi1n"] = rdMolDescriptors.CalcChi1n(mol) df["Chi2n"] = rdMolDescriptors.CalcChi2n(mol) df["Chi3n"] = rdMolDescriptors.CalcChi3n(mol) df["Chi4n"] = rdMolDescriptors.CalcChi4n(mol) df["HallKierAlpha"] = rdMolDescriptors.CalcHallKierAlpha(mol) df["kappa1"] = rdMolDescriptors.CalcKappa1(mol) df["kappa2"] = rdMolDescriptors.CalcKappa2(mol) df["kappa3"] = rdMolDescriptors.CalcKappa3(mol) slogp_VSA = list(map(lambda i: "slogp_VSA" + str(i), list(range(1, 13)))) df = df.assign(**dict(zip(slogp_VSA, rdMolDescriptors.SlogP_VSA_(mol)))) smr_VSA = list(map(lambda i: "smr_VSA" + str(i), list(range(1, 11)))) df = df.assign(**dict(zip(smr_VSA, rdMolDescriptors.SMR_VSA_(mol)))) peoe_VSA = list(map(lambda i: "peoe_VSA" + str(i), list(range(1, 15)))) df = df.assign(**dict(zip(peoe_VSA, rdMolDescriptors.PEOE_VSA_(mol)))) MQNs = list(map(lambda i: "MQN" + str(i), list(range(1, 43)))) df = df.assign(**dict(zip(MQNs, rdMolDescriptors.MQNs_(mol)))) return df
def main(in_file, output): Cmpds = {} InMols = rdkit_open([in_file]) print('\n # Number of input molecule: {0}'.format(len(InMols))) for mol in InMols: m = {} name = mol.GetProp('_Name').split()[0] m['Name'] = name m['Formula'] = rd.CalcMolFormula(mol) m['SMILES'] = Chem.MolToSmiles(mol) m['MW'] = rd._CalcMolWt(mol) # Molecular Weight m['logP'] = rd.CalcCrippenDescriptors(mol)[0] # Partition coefficient m['HDon'] = rd.CalcNumLipinskiHBD(mol) # Lipinski Hbond donor m['HAcc'] = rd.CalcNumLipinskiHBA(mol) # Lipinski Hbond acceptor m['TPSA'] = rd.CalcTPSA(mol) # Topological polar surface area m['Rotat'] = rd.CalcNumRotatableBonds(mol, strict=True) # Rotatable bond m['MolRef'] = rd.CalcCrippenDescriptors(mol)[1] # Molar refractivity m['AliRing'] = rd.CalcNumAliphaticRings(mol) # Aliphatic ring number m['AroRing'] = rd.CalcNumAromaticRings(mol) # Aromatic ring number # m['Stereo'] = rd.CalcNumAtomStereoCenters(mol) # Stereo center number # m['UnspStereo'] = rd.CalcNumUnspecifiedAtomStereoCenters(mol) # unspecified stereo m['SMILES'] = Chem.MolToSmiles(mol, isomericSmiles=True, allHsExplicit=False) Cmpds[name] = m #################################### df = pd.DataFrame.from_dict(Cmpds, orient='index') df.index.name = 'Name' # Columns of data to print out Columns = [ 'Formula', 'MW', 'logP', 'HDon', 'HAcc', 'TPSA', 'Rotat', 'MolRef', 'AliRing', 'AroRing', #'Stereo', 'UnspStereo', 'SMILES', ] reorder = df[Columns] # Output to CSV reorder.to_csv( output+'.csv', sep=',', na_rep='NA', encoding='utf-8', float_format='%.5f', header=True ) # Output to Excel reorder.to_excel( output+'.xlsx', header=True, na_rep='NA' )
def main(): sub_df = pd.read_csv("submissions_final_result.csv") cmp_ds = [] for _, row in sub_df.iterrows(): cmp_dict = {} mol = Chem.MolFromSmiles(row['smiles_string']) cmp_dict['submission_id'] = row['submission_id'] cmp_dict['smiles_string'] = row['smiles_string'] # Lipinski's rule cmp_dict['h_bond_donor'] = rd.CalcNumLipinskiHBD( mol) # Lipinski Hbond donor cmp_dict['h_bond_acceptor'] = rd.CalcNumLipinskiHBA( mol) # Lipinski Hbond acceptor cmp_dict['moluclar_mass'] = rd._CalcMolWt(mol) # Molecular Weight cmp_dict['log_p'] = rd.CalcCrippenDescriptors(mol)[ 0] # Partition coefficient # Topological polar surface area cmp_dict['topological_polar_surface_area'] = rd.CalcTPSA(mol) cmp_ds.append(cmp_dict) result = pd.merge(sub_df, pd.DataFrame(cmp_ds), on=['submission_id', 'smiles_string']) result.to_csv("lipinski_psa_result.csv", index=False, encoding='utf-8')
def calc(smi, name): m = Chem.MolFromSmiles(smi) if m is not None: try: hba = rdMolDescriptors.CalcNumHBA(m) hbd = rdMolDescriptors.CalcNumHBD(m) nrings = rdMolDescriptors.CalcNumRings(m) rtb = rdMolDescriptors.CalcNumRotatableBonds(m) psa = rdMolDescriptors.CalcTPSA(m) logp, mr = rdMolDescriptors.CalcCrippenDescriptors(m) mw = rdMolDescriptors._CalcMolWt(m) csp3 = rdMolDescriptors.CalcFractionCSP3(m) hac = m.GetNumHeavyAtoms() if hac == 0: fmf = 0 else: fmf = GetScaffoldForMol(m).GetNumHeavyAtoms() / hac qed = QED.qed(m) nrings_fused = fused_ring_count(m) n_unique_hba_hbd_atoms = count_hbd_hba_atoms(m) max_ring_size = len(max(m.GetRingInfo().AtomRings(), key=len, default=())) n_chiral_centers = len(FindMolChiralCenters(m, includeUnassigned=True)) fcsp3_bm = rdMolDescriptors.CalcFractionCSP3(GetScaffoldForMol(m)) return name, hba, hbd, hba + hbd, nrings, rtb, round(psa, 2), round(logp, 2), round(mr, 2), round(mw, 2), \ round(csp3, 3), round(fmf, 3), round(qed, 3), hac, nrings_fused, n_unique_hba_hbd_atoms, \ max_ring_size, n_chiral_centers, round(fcsp3_bm, 3) except: sys.stderr.write(f'molecule {name} was omitted due to an error in calculation of some descriptors\n') return None else: sys.stderr.write('smiles %s cannot be parsed (%s)' % (smi, name)) return None
def calc(smi, name): m = Chem.MolFromSmiles(smi) if m is not None: try: hba = rdMolDescriptors.CalcNumHBA(m) hbd = rdMolDescriptors.CalcNumHBD(m) nrings = rdMolDescriptors.CalcNumRings(m) rtb = rdMolDescriptors.CalcNumRotatableBonds(m) psa = rdMolDescriptors.CalcTPSA(m) logp, mr = rdMolDescriptors.CalcCrippenDescriptors(m) mw = rdMolDescriptors._CalcMolWt(m) csp3 = rdMolDescriptors.CalcFractionCSP3(m) hac = m.GetNumHeavyAtoms() if hac == 0: fmf = 0 else: fmf = GetScaffoldForMol(m).GetNumHeavyAtoms() / hac qed = QED.qed(m) nrings_fused = fused_ring_count(m) return name, hba, hbd, hba + hbd, nrings, rtb, round(psa, 2), round(logp, 2), round(mr, 2), round(mw, 2), \ round(csp3, 3), round(fmf, 3), round(qed, 3), hac, nrings_fused except: sys.stderr.write( f'molecule {name} was omitted due to an error in calculation of some descriptors\n' ) return None else: sys.stderr.write('smiles %s cannot be parsed (%s)' % (smi, name)) return None
def calculate_crippen_logp(self): ''' Calcules the Crippen LogP :return: ClogP ''' clogp = rdMolDescriptors.CalcCrippenDescriptors(self.mol) return clogp[0]
def calculate_properties(self, mol): """this method calculates basic properties for the smiles returns : list of int or float (properties)""" properties = [] properties.append(mol.GetNumAtoms()) properties.append(desc.CalcCrippenDescriptors(mol)[0]) properties.append(desc.CalcTPSA(mol)) properties.append(desc.CalcNumRotatableBonds(mol)) properties.append(desc.CalcFractionCSP3(mol)) return properties
def __init__(self): self.temp_dir = tempfile.TemporaryDirectory() self.temp_sdf = tempfile.NamedTemporaryFile(delete=False, suffix='.sdf', dir=self.temp_dir.name) self.api_cache = {} self.esol = ESOLCalculator() self._properties = [ ('MW', 'Molecular Weight', '%.3f', Desc.MolWt), ('logP', 'Lipophilicity (logP)', '%.3f', lambda mol: mDesc.CalcCrippenDescriptors(mol)[0]), ('TPSA', 'Total Polar Surface Area', '%.3f', mDesc.CalcTPSA), ('ESOL', 'Estimated Solubility', '%.3f', self.esol.calc_esol), ('HBA', '# H-Bond Acceptors', '%d', mDesc.CalcNumHBA), ('HBD', '# H-Bond Donors', '%d', mDesc.CalcNumHBD), ('RB', '# Rotatable Bonds', '%d', mDesc.CalcNumRotatableBonds), ('AR', '# Aromatic Rings', '%d', mDesc.CalcNumAromaticRings) ] if not os.path.exists(API_SETTINGS): return with open(API_SETTINGS, 'r') as f: self.api = json.load(f) if self.api.get('overwrite'): self._properties = [] # validate config try: required_endpoint_keys = ['url', 'method', 'data'] required_property_keys = ['description', 'format', 'path'] for endpoint in self.api.get('endpoints'): if not endpoint.get('name'): raise Exception('Invalid config: missing endpoint name') test = [endpoint[k] for k in required_endpoint_keys] for prop, info in endpoint.get('properties').items(): test = [info[k] for k in required_property_keys] except KeyError as key: raise Exception( f'Invalid config: missing {key} on {endpoint["name"]}') except TypeError: raise Exception(f'Invalid config: array where object should be') # register properties for endpoint in self.api.get('endpoints'): for prop, info in endpoint['properties'].items(): fn = partial(self.fetch_property, endpoint, prop) p = (prop, info['description'], info['format'], fn) self._properties.append(p)
def get_lipinksi_test(mol, rule_test): mol.UpdatePropertyCache(strict=False) MW = rdMolDescriptors.CalcExactMolWt(mol) # Calculate mol features. NB CalcCrippenDescriptors returns tuple logP & mr_values feature_values = [rdMolDescriptors.CalcCrippenDescriptors(mol)[0], rdMolDescriptors.CalcNumLipinskiHBD(mol), rdMolDescriptors.CalcNumLipinskiHBA(mol)] test_rule = all(value <= rule_test for value in feature_values) if MW < 500 and MW > 300 and test_rule == True: return True else: return False
def calc(smi, name): m = Chem.MolFromSmiles(smi) if m is not None: hba = rdMolDescriptors.CalcNumHBA(m) hbd = rdMolDescriptors.CalcNumHBD(m) nrings = rdMolDescriptors.CalcNumRings(m) rtb = rdMolDescriptors.CalcNumRotatableBonds(m) psa = rdMolDescriptors.CalcTPSA(m) logp, mr = rdMolDescriptors.CalcCrippenDescriptors(m) mw = rdMolDescriptors._CalcMolWt(m) csp3 = rdMolDescriptors.CalcFractionCSP3(m) fmf = GetScaffoldForMol(m).GetNumAtoms(onlyHeavy=True) / m.GetNumAtoms(onlyHeavy=True) return name, hba, hbd, hba + hbd, nrings, rtb, round(psa, 2), round(logp, 2), round(mr, 2), round(mw, 2), \ round(csp3, 3), round(fmf, 3) else: sys.stderr.write('smiles %s cannot be parsed (%s)' % (smi, name)) return None
def __init__(self): self.temp_dir = tempfile.TemporaryDirectory() self.temp_sdf = tempfile.NamedTemporaryFile(delete=False, suffix='.sdf', dir=self.temp_dir.name) self.esol = ESOLCalculator() self._properties = [ ('MW', 'Molecular Weight', '%.3f', Desc.MolWt), ('logP', 'Lipophilicity (logP)', '%.3f', lambda mol: mDesc.CalcCrippenDescriptors(mol)[0]), ('TPSA', 'Total Polar Surface Area', '%.3f', mDesc.CalcTPSA), ('ESOL', 'Estimated Solubility', '%.3f', self.esol.calc_esol), ('HBA', '# H-Bond Acceptors', '%d', mDesc.CalcNumHBA), ('HBD', '# H-Bond Donors', '%d', mDesc.CalcNumHBD), ('RB', '# Rotatable Bonds', '%d', mDesc.CalcNumRotatableBonds), ('AR', '# Aromatic Rings', '%d', mDesc.CalcNumAromaticRings) ]
def calculate_properties(self, smiles=None, mol=None, props=[]): """this method calculates basic properties for the mol returns : error (bool)""" if len(props) == 0: return True if mol is None: mol = Chem.MolFromSmiles(smiles) if mol is None: return True if 'py_formula' in props: self.data['py_formula'] = desc.CalcMolFormula(mol) if 'py_em' in props: self.data['py_em'] = round(desc.CalcExactMolWt(mol), 5) if 'py_n_Cl_Br' in props: all_atoms = [] for atom in mol.GetAtoms(): all_atoms.append(atom.GetSymbol()) n_Cl = all_atoms.count('Cl') n_Br = all_atoms.count('Br') self.data['py_n_Cl_Br'] = n_Cl + n_Br if 'py_na' in props: self.data['py_na'] = mol.GetNumAtoms() if 'py_mw' in props: self.data['py_mw'] = desc._CalcMolWt(mol) if 'py_fsp3' in props: self.data['py_fsp3'] = desc.CalcFractionCSP3(mol) if 'py_rb' in props: self.data['py_rb'] = desc.CalcNumRotatableBonds(mol) if 'py_tpsa' in props: self.data['py_tpsa'] = desc.CalcTPSA(mol) if 'py_clogp' in props: self.data['py_clogp'] = desc.CalcCrippenDescriptors(mol)[0] if 'py_nar' in props: self.data['py_nar'] = desc.CalcNumAromaticRings(mol) if 'py_nhba' in props: self.data['py_nhba'] = desc.CalcNumHBA(mol) if 'py_nhbd' in props: self.data['py_nhbd'] = desc.CalcNumHBD(mol) return False
def calculate_scalar_descriptors(molecule, symbols): features = list() features.append(rdMD.CalcAsphericity(molecule)) features += list(rdMD.CalcCrippenDescriptors(molecule)) features.append(rdMD.CalcExactMolWt(molecule)) features.append(rdMD.CalcEccentricity(molecule)) features.append(rdMD.CalcFractionCSP3(molecule)) features.append(rdMD.CalcLabuteASA(molecule)) features.append(rdMD.CalcNPR1(molecule)) features.append(rdMD.CalcNPR2(molecule)) features.append(rdMD.CalcHallKierAlpha(molecule)) # elemental distribution symbols = np.array(symbols) features.append(np.sum(symbols == 'H')) features.append(np.sum(symbols == 'C')) features.append(np.sum(symbols == 'N')) features.append(np.sum(symbols == 'O')) features.append(np.sum(symbols == 'F')) # ring features features.append(rdMD.CalcNumAliphaticCarbocycles(molecule)) features.append(rdMD.CalcNumAliphaticHeterocycles(molecule)) features.append(rdMD.CalcNumAromaticCarbocycles(molecule)) features.append(rdMD.CalcNumAromaticHeterocycles(molecule)) features.append(rdMD.CalcNumSaturatedCarbocycles(molecule)) features.append(rdMD.CalcNumSaturatedHeterocycles(molecule)) features.append(rdMD.CalcNumSpiroAtoms( molecule)) # atom shared between rings with one bond features.append(rdMD.CalcNumBridgeheadAtoms( molecule)) # atom shared between rings with at least two bonds # other counts features.append(rdMD.CalcNumAmideBonds(molecule)) features.append(rdMD.CalcNumHBA(molecule)) # number of hydrogen acceptors features.append(rdMD.CalcNumHBD(molecule)) # number of hydrogen donors return np.array(features)
def get_fingerprint(SMILES=None, E_BIND=None): """ PRE: Takes in a MOLECULE as a SMILES POST: Prints its finger prints as two list, the first contains the names, the second contains the fingerprints """ def get_atoms_coords(RDKIT_BLOCK): """Takes as input an RDKIT BLOCK and returns a list of atoms with a numpy array containing the coordinates""" RDKIT_BLOCK = RDKIT_BLOCK.split('\n') atm_number = int(RDKIT_BLOCK[3][:3]) RDKIT_BLOCK = [x.split() for x in RDKIT_BLOCK] atm_list = [] coords_array = np.zeros([atm_number, 3], dtype=float) for i, line in enumerate(RDKIT_BLOCK[4:4 + atm_number]): coords_atm = line atm_list.append(coords_atm[3]) coords_array[i, :] = coords_atm[:3] return atm_list, coords_array def get_atom_types(mol): """ PRE: Takes in the mol POST: Returns a dictionary with the atom types and numbers """ atom_types = {} for atom in mol.GetAtoms(): symbol = atom.GetSymbol() if symbol in atom_types: atom_types[symbol] += 1 else: atom_types[symbol] = 1 return atom_types def AreRingFused(mol): """ PRE : Takes in a mol rdkit POST : Returns the max number of fused rings. That is the maximum number of rings any atom belongs to """ rings = Chem.GetSymmSSSR(mol) ring_dic = {} for ring in rings: for atom in list(ring): if atom in ring_dic: ring_dic[atom] += 1 else: ring_dic[atom] = 1 if ring_dic.values() == []: return 0 else: return max(ring_dic.values()) def getVolume(mol, atom_types): """ PRE: Takes in a mol with HYDROGENS ADDED POST: Returns its volume computed as a linear combination of the contribution of the vdW volumes """ index_of_vols = {'H': 7.24, 'C': 20.58, 'N': 15.60, 'O': 14.71, 'F': 13.31, 'Cl': 22.45, 'Br': 26.52, 'I': 32.52, 'P': 24.43, 'S': 24.43, 'As': 26.52, 'B': 40.48, 'Si': 38.79, 'Se': 28.73, 'Te': 36.62} gross_volume = 0 # for sym in atom_types: # gross_volume += atom_types[sym] * index_of_vols[sym] bonds = mol.GetNumBonds() rings = Chem.GetSymmSSSR(mol) # print 'aromatic ring count is ',descriptors.CalcNumAromaticRings(mol) # print 'aliphatic ring count is ',descriptors.CalcNumAliphaticRings(mol) ra = 0 largest_ra = 0 rna = 0 largest_rna = 0 for ringId in range(len(rings)): if isRingAromatic(mol, tuple(rings[ringId])): ra += 1 if largest_ra < len(rings[ringId]): largest_ra = len(rings[ringId]) else: rna += 1 if largest_rna < len(rings[ringId]): largest_rna = len(rings[ringId]) # volume = gross_volume - 5.92 * bonds - 14.7 * ra - 3.8 * rna try: AllChem.EmbedMolecule(mol) AllChem.MMFFOptimizeMolecule(mol) volume = AllChem.ComputeMolVolume(mol) except: raise ValueError("Can't build the molecule") return volume, ra, rna, largest_ra, largest_rna def isRingAromatic(mol, ring): """ PRE: Takes in a mol and a ring given as a tuple of atom id POST: Returns TRUE is all the atoms inside the ring are aromatic and FALSE otherwise """ aromatic = True for ids in ring: if mol.GetAtomWithIdx(ids).GetIsAromatic(): # print ids pass else: aromatic = False break return aromatic mol = SMILES features = [ 'atomNbr', 'Volume', 'NAtom', 'OAtom', 'SAtom', 'PAtom', 'ClAtom', 'BrAtom', 'FAtom', 'IAtom', 'AromaticRingNumber', 'LargestAromaticRingAtomNbr', 'NonAromaticRingNumber', 'LargestNonAromaticRingAtomNbr', 'MaxNbrFusedRings', 'SurfaceArea', 'Charge', # 'MinRadiusOfCylinder', # 'RadiusOfCylinderBestConf', 'NitroNbr', 'AlcoholNbr', 'KetoneNbr', 'NitrileNbr', 'ThiolNbr', 'Phenol_likeNbr', 'EsterNbr', 'SulfideNbr', 'CarboxilicAcidNbr', 'EtherNbr', 'AmideNbr', 'AnilineNbr', 'PrimaryAmineNbr', 'SecondaryAmineNbr', 'RotableBondNum', 'HBondDonor', 'HBondAcceptor', 'MolLogP', 'MolMR' ] for i in range(6): features.append('Chi{}v'.format(i + 1)) features.append('Chi{}n'.format(i + 1)) if i < 3: features.append('Kappa{}'.format(i + 1)) feature_dic = dict.fromkeys(features) if mol == None: return sorted(feature_dic.keys()) mol = Chem.MolFromSmiles(SMILES) mol = Chem.AddHs(mol) feature_dic['RotableBondNum'] = descriptors.CalcNumRotatableBonds(mol) for i in range(6): feature_dic['Chi{}v'.format(i + 1)] = descriptors.CalcChiNv(mol, i + 1) feature_dic['Chi{}n'.format(i + 1)] = descriptors.CalcChiNn(mol, i + 1) feature_dic['Kappa1'] = descriptors.CalcKappa1(mol) feature_dic['Kappa2'] = descriptors.CalcKappa2(mol) feature_dic['Kappa3'] = descriptors.CalcKappa3(mol) feature_dic['HBondAcceptor'] = descriptors.CalcNumHBA(mol) feature_dic['HBondDonor'] = descriptors.CalcNumHBD(mol) CrippenDescriptors = descriptors.CalcCrippenDescriptors(mol) feature_dic['MolLogP'] = CrippenDescriptors[0] feature_dic['MolMR'] = CrippenDescriptors[1] atom_types = get_atom_types(mol) for feat, symbol in zip(['NAtom', 'OAtom', 'SAtom', 'PAtom', 'ClAtom', 'BrAtom', 'FAtom', 'IAtom'], ['N', 'O', 'S', 'P', 'Cl', 'Br', 'F', 'I']): if symbol in atom_types: feature_dic[feat] = atom_types[symbol] else: feature_dic[feat] = 0 feature_dic['atomNbr'] = mol.GetNumHeavyAtoms() feature_dic['Volume'], feature_dic['AromaticRingNumber'], feature_dic['NonAromaticRingNumber'], feature_dic[ 'LargestAromaticRingAtomNbr'], feature_dic['LargestNonAromaticRingAtomNbr'] = getVolume(mol, atom_types) feature_dic['MaxNbrFusedRings'] = AreRingFused(mol) feature_dic['SurfaceArea'] = descriptors.CalcTPSA(mol) feature_dic['Charge'] = Chem.GetFormalCharge(mol) funct_dic = { '[$([NX3](=O)=O),$([NX3+](=O)[O-])][!#8]': 'NitroNbr', '[#6][OX2H]': 'AlcoholNbr', '[NX1]#[CX2]': 'NitrileNbr', '[#6][CX3](=O)[#6]': 'KetoneNbr', '[#16X2H]': 'ThiolNbr', "[OX2H][cX3][c]": 'Phenol_likeNbr', '[#6][CX3](=O)[OX2H0][#6]': 'EsterNbr', '[#16X2H0]': 'SulfideNbr', '[CX3](=O)[OX2H1]': 'CarboxilicAcidNbr', '[OD2]([#6])[#6]': 'EtherNbr', # '[NX3][CX3](=[OX1])[#6]':'AmideNbr', '[#7X3][#6X3](=[OX1])[#6]': 'AmideNbr', '[NX3][cc]': 'AnilineNbr', '[NX3H2;!$(NC=O)]': 'PrimaryAmineNbr', '[NX3H1;!$(NC=O)]': 'SecondaryAmineNbr'} for funct in funct_dic: patt = Chem.MolFromSmarts(funct) feature_dic[funct_dic[funct]] = len(mol.GetSubstructMatches(patt)) # names, coords = get_atoms_coords(Chem.MolToMolBlock(mol)) # feature_dic['MinRadiusOfCylinder'] = returnCircleAsTuple(coords[:,1:])[2] # feature_dic['MinRadiusOfCylinder'] = RADIUS[0] # feature_dic['RadiusOfCylinderBestConf'] = RADIUS[1] values = [] for key in sorted(feature_dic.keys()): values.append(feature_dic[key]) # print key, feature_dic[key] return values
mol = inMol if patts is None: global _smartsPatterns, _patternOrder if _smartsPatterns == {}: _patternOrder, _smartsPatterns = _ReadPatts(defaultPatternFileName) patts = _smartsPatterns order = _patternOrder atomContribs = _pyGetAtomContribs(mol, patts, order, verbose=verbose) return numpy.sum(atomContribs, 0)[1] _pyMolMR.version = "1.1.0" MolLogP = lambda *x, **y: rdMolDescriptors.CalcCrippenDescriptors(*x, **y)[0] MolLogP.version = rdMolDescriptors._CalcCrippenDescriptors_version MolLogP.__doc__ = """ Wildman-Crippen LogP value Uses an atom-based scheme based on the values in the paper: S. A. Wildman and G. M. Crippen JCICS 39 868-873 (1999) **Arguments** - inMol: a molecule - addHs: (optional) toggles adding of Hs to the molecule for the calculation. If true, hydrogens will be added to the molecule and used in the calculation. """
def logp(molecule): """ Calculate the logP of the selfies string """ m = MolFromSmiles(sf.decoder(molecule)) return rdMolDescriptors.CalcCrippenDescriptors(m)[0]
def compute_logP(self, mol_input): logP, mr = rdMolDescriptors.CalcCrippenDescriptors(mol_input) return logP