def extract_atompair_fragments(molecule: object) -> list: output = [] pairFps = Pairs.GetAtomPairFingerprint(molecule) d = pairFps.GetNonzeroElements() for pair in d: atom1 = rdkit.Chem.AtomFromSmarts(Pairs.ExplainPairScore(pair)[0][0]) atom2 = rdkit.Chem.AtomFromSmarts(Pairs.ExplainPairScore(pair)[2][0]) smiles = (Pairs.ExplainPairScore(pair)[0][0] + Pairs.ExplainPairScore(pair)[2][0]) atom1_type = atom1.GetAtomicNum() atom2_type = atom2.GetAtomicNum() atom1_num_pi_bonds = Pairs.ExplainPairScore(pair)[0][2] atom2_num_pi_bonds = Pairs.ExplainPairScore(pair)[2][2] atom1_num_neigh = Pairs.ExplainPairScore(pair)[0][1] atom2_num_neigh = Pairs.ExplainPairScore(pair)[2][1] atom1_property_value = 64 * atom1_type + 16 * atom1_num_pi_bonds + atom1_num_neigh atom2_property_value = 64 * atom2_type + 16 * atom2_num_pi_bonds + atom2_num_neigh dist = Pairs.ExplainPairScore(pair)[1] + 1 atom_pair_key = min( atom1_property_value, atom2_property_value) + 1024 * ( max(atom1_property_value, atom2_property_value) + 1024 * dist) num = (d[pair]) for i in range(num): output.append({ "smiles": smiles, "index": atom_pair_key, "type": "AP", "size": dist }) return output
def atom_pairs(): """ Atom pair fingerprints, atom descriptor """ # Generate molecules ms = [ Chem.MolFromSmiles('C1CCC1OCC'), Chem.MolFromSmiles('CC(C)OCC'), Chem.MolFromSmiles('CCOCC') ] pairFps = [Pairs.GetAtomPairFingerprint(x) for x in ms] # Get the list of bits and their counts for each fingerprint as a dictionary d = pairFps[-1].GetNonzeroElements() print(d) # Explanation of the bitscore. print(Pairs.ExplainPairScore(558115)) # Dice similarity; The usual metric for similarity between atom-pair fingerprints print(DataStructs.DiceSimilarity(pairFps[0], pairFps[1])) # Atom decriptor without count pairFps = [Pairs.GetAtomPairFingerprintAsBitVect(x) for x in ms] print(DataStructs.DiceSimilarity(pairFps[0], pairFps[1]))
ms, molsPerRow=3, subImgSize=(200, 200), legends=['' for x in ms] ) img.save( '/drug_development/studyRdkit/st_rdcit/img/mol21.jpg' ) pairFps = [Pairs.GetAtomPairFingerprint(x) for x in ms] print(pairFps) # 由于包含在原子对指纹中的位空间很大,因此他们以稀疏的方式存储为字典形式 d = pairFps[-1].GetNonzeroElements() print(d) # {541732: 1, 558113: 2, 558115: 2, 558146: 1, 1606690: 2, 1606721: 2} print(d[541732]) # 1 # 位描述也可以像如下所示展示 de = Pairs.ExplainPairScore(558115) print(de) # (('C', 1, 0), 3, ('C', 2, 0)) # The above means: C with 1 neighbor and 0 pi electrons which is 3 bonds from a C with 2 neighbors and 0 pi electrons # 碳带有一个邻位孤电子和0个π电子,这是因为碳与两个邻位原子和氧原子形成3个化学键。 # # 2.4 拓扑扭曲topological torsions tts = [Torsions.GetTopologicalTorsionFingerprintAsIntVect(x) for x in ms] d_ds = DataStructs.DiceSimilarity(tts[0], tts[1]) print(d_ds) # 0.16666666666666666 # # 2.5 摩根指纹(圆圈指纹)AllChem.GetMorganFingerprint(mol,2) # 通过将Morgan算法应用于一组用户提供的原子不变式,可以构建这一系列的指纹。生成Morgan指纹时,还必须提供指纹的半径 m1 = Chem.MolFromSmiles('Cc1ccccc1') m2 = Chem.MolFromSmiles('Cc1ncccc1') fp1 = AllChem.GetMorganFingerprint(m1, 2) fp2 = AllChem.GetMorganFingerprint(m2, 2)