def _get_distance_matrix(self, combo: Chem.Mol, A: Union[Chem.Mol, np.ndarray], B: Union[Chem.Mol, np.ndarray]) -> np.ndarray: """ Called by ``_find_closest`` and ``_determine_mergers_novel_ringcore_pair`` in collapse ring (for expansion). """ # TODO move to base once made. # input type if isinstance(A, Chem.Mol): mol_A = A A_idxs = np.arange(mol_A.GetNumAtoms()) else: mol_A = None A_idxs = np.array(A) if isinstance(B, Chem.Mol): mol_B = B B_idxs = np.arange(mol_B.GetNumAtoms()) + mol_A.GetNumAtoms() else: mol_B = None B_idxs = np.array(B) # make matrix distance_matrix = Chem.Get3DDistanceMatrix(combo) length = combo.GetNumAtoms() # nan fill the self values self._nan_submatrix(distance_matrix, A_idxs) self._nan_submatrix(distance_matrix, B_idxs) return distance_matrix
def mol_to_mutliconformer_file(rdkit_mol: Chem.Mol, file_name: str) -> None: """ Write the rdkit molecule to a multi conformer file. Args: rdkit_mol: A complete Chem.Mol instance of a molecule. file_name: Name of the file to be created. """ file_path = Path(file_name) # get the file block writer if file_path.suffix == ".pdb": writer = Chem.MolToPDBBlock elif file_path.suffix == ".mol" or file_path.suffix == ".sdf": writer = Chem.MolToMolBlock elif file_path.suffix == ".xyz": writer = Chem.MolToXYZBlock else: raise FileTypeError( f"The file type {file_path.suffix} is not supported please chose from xyz, pdb, mol or sdf." ) with open(file_name, "w") as out: for i in range(rdkit_mol.GetNumConformers()): out.write(writer(rdkit_mol, confId=i))
def conformer_energy(molecule: Chem.Mol, conformer_id: int = 0, forcefield: str = "UFF") -> float: """ Get the energy of a conformer in a molecule using the universal forcefield (UFF) forcefield or the merck molecular forcefield (MMFF) forcefield. Parameters ---------- molecule : rdkit.Chem.Mol The molecule which energy will be calculated. forcefield : {"UFF", "MMFF"}, optional. The forcefield that will be used to calculate the energy (default="UFF"). Returns ------- float The energy of the molecule. """ if molecule.GetNumConformers() == 0: raise NoConformersError("Molecule must have at least one conformer") if forcefield == "UFF": ff = AllChem.UFFGetMoleculeForceField(molecule, confId=conformer_id) elif forcefield == "MMFF": props = AllChem.MMFFGetMoleculeProperties(molecule) ff = AllChem.MMFFGetMoleculeForceField(molecule, props, confId=conformer_id) return ff.CalcEnergy()
def generate_conformers(rdkit_mol: Chem.Mol, conformer_no: int) -> List[np.ndarray]: """ Generate a set of conformers for the molecule including the input conformer. Args: rdkit_mol: A complete Chem.Mol instance of a molecule. conformer_no: The number of conformers made for the molecule return: A list of conformer position arrays """ AllChem.EmbedMultipleConfs( rdkit_mol, numConfs=conformer_no, randomSeed=1, clearConfs=False, useBasicKnowledge=True, pruneRmsThresh=1, enforceChirality=True, ) positions = rdkit_mol.GetConformers() return [conformer.GetPositions() for conformer in positions]
def guess_origins(self, mol: Chem.Mol = None, hits: Optional[List[Chem.Mol]] = None): """ Given a positioned mol guess its origins... :param mol: :return: """ if hits is None: hits = self.hits mappings = [] for h, hit in enumerate(hits): hname = hit.GetProp('_Name') for hi, mi in self.get_positional_mapping(hit, mol).items(): atom = mol.GetAtomWithIdx(mi) if atom.HasProp('_Novel') and atom.GetBoolProp( '_Novel') == True: continue # flagged to avoid. elif atom.HasProp( '_Origin') and atom.GetProp('_Origin') != 'none': origin = json.loads(atom.GetProp('_Origin')) else: origin = [] origin.append(f'{hname}.{hi}') atom.SetProp('_Origin', json.dumps(origin))
def max_from_mol(self, mol: Chem.Mol = None): if mol is None: mol = self.positioned_mol return [ atom.GetDoubleProp('_Max') if atom.HasProp('_Max') else 0 for atom in mol.GetAtoms() ]
def _pre_fragment_pairs(self, scaffold: Chem.Mol, fragmentanda: Chem.Mol, A2B_mapping: Optional = None) \ -> Dict[int, List[Dict]]: """ Returns {4: [{'idx': 5, 'type': rdkit.Chem.rdchem.BondType.SINGLE, 'idx_F': 5, 'idx_S': 1}], ...} which is slight more than {5: [{'idx': 4, 'type': rdkit.Chem.rdchem.BondType.SINGLE}], ... from categories idx_F: fragmentanda index idx_S: scaffold index required for self.merge, the key is the index of anchoring atom. Calls get_positional_mapping and _categorise. :param scaffold: mol to be added to. :param fragmentanda: mol to be fragmented :param A2B_mapping: see ``get_positional_mapping`` :return: """ # get A2B mapping if A2B_mapping is None: A2B_mapping = self.get_positional_mapping(scaffold, fragmentanda) get_key = lambda d, v: list(d.keys())[list(d.values()).index(v)] if len(A2B_mapping) == 0: raise ConnectionError('No overlap!') # store alternative atom symbols. for si, fi in A2B_mapping.items(): sa = scaffold.GetAtomWithIdx(si) sn = sa.GetSymbol() fn = fragmentanda.GetAtomWithIdx(fi).GetSymbol() if sn != fn: sa.SetProp('_AltSymbol', fn) # prepare. uniques = set(range(fragmentanda.GetNumAtoms())) - set( A2B_mapping.values()) categories = self._categorise(fragmentanda, uniques) pairs = categories['pairs'] for p in pairs: # pairs:Dict[List[Dict]] for pp in pairs[p]: pp['idx_F'] = pp['idx'] # less ambiguous: fragmentanda index pp['idx_S'] = get_key(A2B_mapping, pp['idx']) # scaffold index return pairs
def convert_to_graph(mol: Chem.Mol, scaffold_ids: t.Tuple[int], anchors: t.Dict[int, int], hba_ids: t.Tuple[int], hbd_ids: t.Tuple[int]) -> nx.Graph: """ Convert `Chem.Mol` object to `nx.Graph` object Args: mol (Chem.Mol): The molecule object to be converted scaffold_ids (t.Tuple[int]): The atom that corresponds to scaffolds anchors (t.Dict[int, int]): The mapping from atom in the molecule to atom in scaffold where it is attached to hba_ids (t.Tuple[int]): The atoms corresponding to hydrogen acceptors hbd_ids (t.Tuple[int]): The atoms corresponding to hydrogen donnors Returns: nx.Graph: The graph converted """ # Initialize graph graph = nx.Graph() # Add nodes nodes = range(mol.GetNumAtoms()) graph.add_nodes_from(nodes) # Add edges bond: Chem.Bond edges = [(bond.GetBeginAtomIdx(), bond.GetEndAtomIdx()) for bond in mol.GetBonds()] graph.add_edges_from(edges) # Attach properties to nodes for node_id in nodes: atom_i: Chem.Atom = mol.GetAtomWithIdx(node_id) graph.nodes[node_id]['symbol'] = atom_i.GetSymbol() for node_id in anchors: graph.nodes[node_id]['anchor'] = anchors[node_id] for node_id in hba_ids: graph.nodes[node_id]['is_hba'] = True for node_id in hbd_ids: graph.nodes[node_id]['is_hbd'] = True for node_id in scaffold_ids: graph.nodes[node_id]['is_scaffold'] = True return graph
def merge(self, scaffold: Chem.Mol, fragmentanda: Chem.Mol, anchor_index: int, attachment_details: List[Dict]) -> Chem.Mol: for detail in attachment_details: attachment_index = detail['idx_F'] # fragmentanda attachment_index scaffold_attachment_index = detail['idx_S'] bond_type = detail['type'] f = Chem.FragmentOnBonds(fragmentanda, [ fragmentanda.GetBondBetweenAtoms(anchor_index, attachment_index).GetIdx() ], addDummies=False) frag_split = [] fragmols = Chem.GetMolFrags(f, asMols=True, fragsMolAtomMapping=frag_split, sanitizeFrags=False) if self._debug_draw: print(frag_split) # Get the fragment of interest. ii = 0 for mol_N, indices in enumerate(frag_split): if anchor_index in indices: break ii += len(indices) else: raise Exception frag = fragmols[mol_N] frag_anchor_index = indices.index(anchor_index) if self._debug_draw: self.draw_nicely(frag) combo = Chem.RWMol(rdmolops.CombineMols(scaffold, frag)) scaffold_anchor_index = frag_anchor_index + scaffold.GetNumAtoms() if self._debug_draw: print(scaffold_anchor_index, scaffold_attachment_index, anchor_index, scaffold.GetNumAtoms()) self.draw_nicely(combo) combo.AddBond(scaffold_anchor_index, scaffold_attachment_index, bond_type) Chem.SanitizeMol( combo, sanitizeOps=Chem.rdmolops.SanitizeFlags.SANITIZE_ADJUSTHS + Chem.rdmolops.SanitizeFlags.SANITIZE_SETAROMATICITY, catchErrors=True) if self._debug_draw: self.draw_nicely(combo) scaffold = combo return scaffold
def _prevent_weird_rings(self, mol: Chem.Mol): if not isinstance(mol, Chem.RWMol): mol = Chem.RWMol(mol) ringatoms = self._get_ring_info(mol) #GetRingInfo().AtomRings() for ring_A, ring_B in itertools.combinations(ringatoms, r=2): shared = set(ring_A).intersection(set(ring_B)) if len(shared) == 0: log.debug('This molecule has some separate rings') pass # separate rings elif len(shared) == 1: log.debug('This molecule has a spiro bicycle') pass # spiro ring. elif len(shared) == 2: log.debug('This molecule has a fused ring') if mol.GetBondBetweenAtoms(*shared) is not None: pass # indole/naphtalene small, big = sorted([ring_A, ring_B], key=lambda ring: len(ring)) if len(small) == 4: log.warning('This molecule has a benzo-azetine–kind-of-thing: expanding to indole') # Chem.MolFromSmiles('C12CCCCC1CC2') # benzo-azetine is likely an error: add and extra atom a, b = set(small).difference(big) self._place_between(mol, a, b) elif len(small) == 3: log.warning('This molecule has a benzo-cyclopropane–kind-of-thing: expanding to indole') # Chem.MolFromSmiles('C12CCCCC1C2') # benzo-cyclopronane is actually impossible at this stage. a = list(set(small).difference(big))[0] for b in shared: self._place_between(mol, a, b) else: pass # indole and nathalene elif (len(ring_A), len(ring_B)) == (6, 6): raise Exception('This is utterly impossible') else: print(f'mysterious ring system {len(ring_A)} + {len(ring_B)}') pass # ???? elif len(shared) < self.atoms_in_bridge_cutoff: #adamantene/norbornane/tropinone kind of thing log.warning('This molecule has a bridge: leaving') pass # ideally check if planar... else: log.warning('This molecule has a bridge that will be removed') mol = self._prevent_bridge_ring(mol, ring_A) # start from scratch. return self._prevent_weird_rings(mol) return mol.GetMol()
def get_pharmacophoric_point(ligand: Chem.Mol, feat_name: str, atom_indices: Sequence, conformer_index: int, radius: float, directionality: bool) -> PharmacophoricPoint: """ Obtain the coordinates and if specified the direction vector and return a pharmacophoric point. Parameters ---------- ligand : rdkit.Chem.Mol A ligand conformer_index : int The conformer whose coordinates will be used to obtain the pharmacophoric points. radius : float Lenght of the radius in angstroms of the parmacohporic point. directionality : bool Whether to compute the direction vectgor of that point. Returns ------- openpharmacophore.PharmacophoricPoint A pharmacophoric point. """ if len(atom_indices) > 1: # Find the centroid # Aromatic, hydrophobic, positive or negative feature coords = PharmacophoricPointExtractor._feature_centroid( ligand, atom_indices, conformer_index) # Find direction vector if directionality: direction = PharmacophoricPointExtractor._aromatic_direction_vector( ligand, atom_indices, conformer_index) else: direction = None else: # Find the centroid # Donor or acceptor feature position = ligand.GetConformer(conformer_index).GetAtomPosition( atom_indices[0]) coords = np.zeros((3, )) coords[0] = position.x coords[1] = position.y coords[2] = position.z # Find direction vector if directionality: direction = PharmacophoricPointExtractor._donor_acceptor_direction_vector( ligand, atom_indices[0], coords, conformer_index) else: direction = None return PharmacophoricPoint(feat_type=feat_name, center=puw.quantity(coords, "angstroms"), radius=puw.quantity(radius, "angstroms"), direction=direction, atom_indices=atom_indices)
def CalculateChi0(mol: Chem.Mol) -> float: """Calculate molecular connectivity chi index for path order 0.""" deltas = [x.GetDegree() for x in mol.GetAtoms()] while 0 in deltas: deltas.remove(0) deltas = numpy.array(deltas, 'd') res = sum(numpy.sqrt(1. / deltas)) return res
def CalculateMeanWeiner(mol: Chem.Mol) -> float: """Get Mean Weiner index of a molecule. Or AW. """ N = mol.GetNumAtoms() WeinerNumber = CalculateWeiner(mol) return 2.0 * WeinerNumber / (N * (N - 1))
def CalculateQuadratic(mol: Chem.Mol) -> float: """Get Quadratic index. Or Qindex. """ M = CalculateZagreb1(mol) N = mol.GetNumAtoms() return 3 - 2 * N + M / 2.0
def toxicity(self, molecule: Chem.Mol) -> bool: """ Checks if a given molecule fails the structural filters. """ for (pattern, tolerance) in zip(self.pattern_list, self.tolerance_list): if len(molecule.GetSubstructMatches(pattern)) > tolerance: return True return False
def store(self, combined: Chem.Mol, combined_map: Dict[int, int], disregarded: List[Chem.Mol]): combined.SetProp('parts', json.dumps([m.GetProp('_Name') for m in disregarded])) self.c_map_options.append(combined_map) self.c_options.append(combined) self.c_disregarded_options.append(disregarded) return None
def _get_ori_i(self, mol: Chem.Mol, include_collapsed=True): indices = [atom.GetIntProp('_ori_i') for atom in mol.GetAtoms()] if include_collapsed: for atom in self._get_collapsed_atoms(mol): indices.extend(json.loads(atom.GetProp('_ori_is'))) else: pass return indices
def find_closest_to_ligand(cls, pdb: Chem.Mol, ligand_resn: str) -> Tuple[Chem.Atom, Chem.Atom]: """ Find the closest atom to the ligand :param pdb: a rdkit Chem object :param ligand_resn: 3 letter code :return: tuple of non-ligand atom and ligand atom """ ligand = [atom.GetIdx() for atom in pdb.GetAtoms() if atom.GetPDBResidueInfo().GetResidueName() == ligand_resn] dm = Chem.Get3DDistanceMatrix(pdb) mini = np.take(dm, ligand, 0) mini[mini == 0] = np.nan mini[:, ligand] = np.nan a, b = np.where(mini == np.nanmin(mini)) lig_atom = pdb.GetAtomWithIdx(ligand[int(a[0])]) nonlig_atom = pdb.GetAtomWithIdx(int(b[0])) return (nonlig_atom, lig_atom)
def _get_mol_sender_receivers(mol: Chem.Mol) -> Tuple[np.ndarray, np.ndarray]: """Get connectivity (messages) info for a data_dict.""" senders, receivers = [], [] for bond in mol.GetBonds(): id1 = bond.GetBeginAtom().GetIdx() id2 = bond.GetEndAtom().GetIdx() senders.extend([id1, id2]) receivers.extend([id2, id1]) return np.array(senders), np.array(receivers)
def store_positions(self, mol: Chem.Mol) -> Chem.Mol: """ Saves positional data as _x, _y, _z and majorly ``_ori_i``, the original index. The latter gets used by ``_get_new_index``. :param mol: :return: """ conf = mol.GetConformer() name = mol.GetProp('_Name') for i, atom in enumerate(mol.GetAtoms()): pos = conf.GetAtomPosition(i) atom.SetIntProp('_ori_i', i) atom.SetProp('_ori_name', name) atom.SetDoubleProp('_x', pos.x) atom.SetDoubleProp('_y', pos.y) atom.SetDoubleProp('_z', pos.z) return mol
def process(mol: Mol, device: torch.device, **kwargs): n = mol.GetNumAtoms() + 1 graph = DGLGraph() graph.add_nodes(n) graph.add_edges(graph.nodes(), graph.nodes()) graph.add_edges(range(1, n), 0) # graph.add_edges(0, range(1, n)) for e in mol.GetBonds(): u, v = e.GetBeginAtomIdx(), e.GetEndAtomIdx() graph.add_edge(u + 1, v + 1) graph.add_edge(v + 1, u + 1) adj = graph.adjacency_matrix(transpose=False).to_dense() v, m = feature.mol_feature(mol) vec = torch.cat([torch.zeros((1, m)), v]).to(device) return ChebNetData(n, adj, vec)
def apply_to_mol(self, mol: Chem.Mol): results_dict = asdict(self) results_dict.update({ "dG_bind": self.dG_bind, "dG_bind_err": self.dG_bind_err, }) for field, val in results_dict.items(): field_name = self._convert_field_to_sdf_field(field) mol.SetProp(field_name, str(val))
def get_conn(cls, mol: Chem.Mol) -> Chem.Atom: """ Get connecting atom of mol. """ for atom in mol.GetAtoms(): if atom.GetSymbol() == '*': return atom.GetNeighbors()[0] else: raise ValueError('Dummy atom not found')
def mol_to_nx(mol: Chem.Mol): G = nx.Graph() for atom in mol.GetAtoms(): G.add_node( atom.GetIdx(), atomic_num=atom.GetAtomicNum(), formal_charge=atom.GetFormalCharge(), chiral_tag=atom.GetChiralTag(), hybridization=atom.GetHybridization(), num_explicit_hs=atom.GetNumExplicitHs(), is_aromatic=atom.GetIsAromatic(), ) for bond in mol.GetBonds(): G.add_edge( bond.GetBeginAtomIdx(), bond.GetEndAtomIdx(), bond_type=bond.GetBondType() ) return G
def map_query(mol: Chem.Mol, query: Chem.Mol) -> t.Tuple[int]: """ Get the set of indices of all atoms in molecule `mol` matching the query `query` """ match = set() for match_i in mol.GetSubstructMatches(query): match = match | set(match_i) match = tuple(match) return match
def CalculateKappa2(mol: Chem.Mol) -> float: """Calculate molecular shape index for two bonded fragments.""" P2 = len(Chem.FindAllPathsOfLengthN(mol, 2)) A = mol.GetNumHeavyAtoms() denom = P2 + 0.0 if denom: kappa = (A - 1) * (A - 2)**2 / denom**2 else: kappa = 0.0 return round(kappa, 3)
def merge_pair(self, scaffold: Chem.Mol, fragmentanda: Chem.Mol, mapping: Optional = None) -> Chem.Mol: """ To specify attachments use ``.merge``. To understand what is going on see ``.categorise`` :param scaffold: mol to be added to. :param fragmentanda: mol to be fragmented :param mapping: see ``get_positional_mapping``. Optional in _pre_fragment_pairs :return: """ done_already = [] fp = self._pre_fragment_pairs(scaffold, fragmentanda, mapping) # confusingly these are hit indexed. for anchor_index, attachment_details in fp.items(): # anchor index is the fragment-to-added's internal atom that attaches if anchor_index in done_already: continue # fix rings. uniques = { atom.GetIdx() for atom in fragmentanda.GetAtoms() if 'overlapping' not in atom.GetProp('_Category') } team = self._recruit_team(fragmentanda, anchor_index, uniques) other_attachments = list((team & set(fp.keys())) - {anchor_index}) other_attachment_details = [] for other in other_attachments: other_attachment_details.append(fp[other]) done_already.append(other) scaffold = self._merge_part( scaffold, fragmentanda, anchor_index=anchor_index, attachment_details=attachment_details, other_attachments=other_attachments, other_attachment_details=other_attachment_details) new_name = self._get_combined_name(scaffold, fragmentanda) scaffold.SetProp('_Name', new_name) self.keep_copy(scaffold, 'pair_merged') return scaffold
def CalculateChi1(mol: Chem.Mol): """Calculate molecular connectivity chi index for path order 1.""" cc = [x.GetBeginAtom().GetDegree() * x.GetEndAtom().GetDegree() for x in mol.GetBonds()] if len(cc) == 0: return 0.0 while 0 in cc: cc.remove(0) cc = numpy.array(cc, 'd') res = sum(numpy.sqrt(1. / cc)) return res
def _CalculateBondNumber(mol: Chem.Mol, bondtype: str = 'SINGLE') -> float: """Calculate number of bond of specified type. :param bondtype: can be SINGLE, DOUBLE, TRIPLE or AROMATIC. """ i = 0 for bond in mol.GetBonds(): if bond.GetBondType().name == bondtype: i += 1 return i
def CalculateMeanRandic(mol: Chem.Mol) -> float: """Calculate mean chi1 (Randic) connectivity index.""" cc = [x.GetBeginAtom().GetDegree() * x.GetEndAtom().GetDegree() for x in mol.GetBonds()] if len(cc) == 0: return 0.0 while 0 in cc: cc.remove(0) cc = numpy.array(cc, 'd') res = numpy.mean(numpy.sqrt(1. / cc)) return res