def _validate(self, vals, tol=1e-2, show=False): for smi, ans in vals: mol = Chem.MolFromSmiles(smi) types = AtomTypes.TypeAtoms(mol) if show: # pragma: nocover print(types) self.assertEqual(len(ans), len(types), 'bad type len for smiles: %s' % (smi)) lens = [len(x) for x in types] self.assertEqual(max(lens), 1, 'atom matched multiple types for smiles: %s' % (smi)) for a, b in zip(ans, [x[0] for x in types]): self.assertEqual(a, b, 'bad type for SMILES: %s' % (smi))
def _validate(self,vals,tol=1e-2,show=0): for smi,ans in vals: mol = Chem.MolFromSmiles(smi) types = AtomTypes.TypeAtoms(mol) if show: print types assert len(ans)==len(types),'bad type len for smiles: %s'%(smi) lens = [len(x) for x in types] assert max(lens)==1,'atom matched multiple types for smiles: %s'%(smi) types = [x[0] for x in types] for a,b in zip(ans,types): assert a==b,'bad type for SMILES: %s'%(smi)
def _exampleCode(): """ Example code for calculating E-state fingerprints """ from rdkit import Chem smis = ['CC', 'CCC', 'c1[nH]cnc1CC(N)C(O)=O', 'NCCc1ccc(O)c(O)c1'] for smi in smis: m = Chem.MolFromSmiles(smi) print(smi, Chem.MolToSmiles(m)) types = AtomTypes.TypeAtoms(m) for i in range(m.GetNumAtoms()): print('%d %4s: %s' % (i + 1, m.GetAtomWithIdx(i).GetSymbol(), str(types[i]))) es = EStateIndices(m) counts, sums = FingerprintMol(m) for i in range(len(AtomTypes.esPatterns)): if counts[i]: name, _ = AtomTypes.esPatterns[i] print('%6s, % 2d, % 5.4f' % (name, counts[i], sums[i])) for i in range(len(es)): print('% 2d, % 5.4f' % (i + 1, es[i])) print('--------')
def finger_print(chunk): """ Create a dictionary with the e-state fingerprint for the molecule in mol (rdkit mol) Input: mol; rdkit mol object name; structure name e_opt; energy gap (target) """ if AtomTypes.esPatterns is None: AtomTypes.BuildPatts() name_list = [name for name, _ in AtomTypes.esPatterns] df = pd.DataFrame(columns=['name', 'smiles'] + name_list) for row_index, row in chunk.iterrows(): name = (row["name"]) smiles = (row["smiles"]) mol = Chem.MolFromSmiles(smiles) try: types = AtomTypes.TypeAtoms(mol) es = EStateIndices(mol) counts, sums = Fingerprinter.FingerprintMol(mol) if AtomTypes.esPatterns is None: AtomTypes.BuildPatts() name_list = [name for name, _ in AtomTypes.esPatterns] data = {'name': name, 'smiles': smiles} data2 = {k: v for k, v in zip(name_list, sums)} data.update(data2) df = df.append(data, ignore_index=True) except AttributeError: print(i, formula) continue return df
def finger_print(mol, name, e_opt): """ Create a dictionary with the e-state fingerprint for the molecule in mol (rdkit mol) Input: mol; rdkit mol object name; structure name e_opt; energy gap (target) """ types = AtomTypes.TypeAtoms(mol) es = EStateIndices(mol) counts, sums = Fingerprinter.FingerprintMol(mol) if AtomTypes.esPatterns is None: AtomTypes.BuildPatts() name_list = [name for name, _ in AtomTypes.esPatterns] data = {'name': name, 'E_opt': e_opt} data2 = {k: v for k, v in zip(name_list, sums)} data.update(data2) return data
nPatts = len(AtomTypes.esPatterns) counts = numpy.zeros(nPatts,numpy.int) sums = numpy.zeros(nPatts,numpy.float) for i,(name,pattern) in enumerate(AtomTypes.esPatterns): matches = mol.GetSubstructMatches(pattern,uniquify=1) counts[i] = len(matches) for match in matches: sums[i] += esIndices[match[0]] return counts,sums if __name__ == '__main__': from rdkit import Chem smis = ['CC','CCC','c1[nH]cnc1CC(N)C(O)=O','NCCc1ccc(O)c(O)c1'] for smi in smis: m = Chem.MolFromSmiles(smi) print smi,Chem.MolToSmiles(m) types = AtomTypes.TypeAtoms(m) for i in range(m.GetNumAtoms()): print '%d %4s: %s'%(i+1,m.GetAtomWithIdx(i).GetSymbol(),str(types[i])) es = EStateIndices(m) counts,sums = FingerprintMol(m) for i in range(len(AtomTypes.esPatterns)): if counts[i]: name,patt = AtomTypes.esPatterns[i] print '%6s, % 2d, % 5.4f'%(name,counts[i],sums[i]) for i in range(len(es)): print '% 2d, % 5.4f'%(i+1,es[i]) print '--------'