def extract_shingles(self, individual): qry_shingles = set() radius_constr = self.radius + 1 # Reloading molecule to make it aromatic mol = MolFromSmiles(individual.to_aromatic_smiles()) for atm_idx in range(individual.mol_graph.GetNumAtoms()): for N in range(1, radius_constr): bonds = AllChem.FindAtomEnvironmentOfRadiusN(mol, N, atm_idx) if not bonds: break # the reportedly faster method atoms = set() for bond_id in bonds: bond = mol.GetBondWithIdx(bond_id) atoms.add(bond.GetBeginAtomIdx()) atoms.add(bond.GetEndAtomIdx()) if self.rooted: new_shingle = Chem.rdmolfiles.MolFragmentToSmiles( mol, list(atoms), bonds, 0, 0, False, False, atm_idx, True, False, False) else: new_shingle = Chem.rdmolfiles.MolFragmentToSmiles( mol, list(atoms), bonds, 0, 0, False, False, -1, True, False, False) qry_shingles.add(new_shingle) return qry_shingles
def extract_shingles(smiles, level, as_list=False): """ Extracting up to the given level from the given smiles see https://jcheminf.biomedcentral.com/articles/10.1186/s13321-018-0321-8 """ if as_list: qry_shingles = list() else: qry_shingles = set() radius_constr = level + 1 # Reloading molecule to make it aromatic mol = MolFromSmiles(smiles) for atm_idx in range(mol.GetNumAtoms()): for N in range(1, radius_constr): bonds = AllChem.FindAtomEnvironmentOfRadiusN(mol, N, atm_idx) if not bonds: break # the reportedly faster method atoms = set() for bond_id in bonds: bond = mol.GetBondWithIdx(bond_id) atoms.add(bond.GetBeginAtomIdx()) atoms.add(bond.GetEndAtomIdx()) # Computed rooted shingle new_shingle = Chem.rdmolfiles.MolFragmentToSmiles( mol, list(atoms), bonds, 0, 0, False, False, atm_idx, True, False, False) if as_list: qry_shingles.append(new_shingle) else: qry_shingles.add(new_shingle) return qry_shingles