def all_atom_coords(mol: rdkit.Mol, conformer=-1): """ Yields the coordinates of atoms in :attr:`mol`. Parameters ---------- conformer : :class:`int`, optional The id of the conformer to be used. Yields ------ :class:`tuple` The yielded :class:`tuple` has the form .. code-block:: python (32, numpy.array([12, 34, 3])) Where the first element is the atom id and the second element is an array holding the coordinates of the atom. """ # Get the conformer from the rdkit instance. conf = mol.GetConformer(conformer) # Go through all the atoms and ask the conformer to return # the position of each atom. This is done by supplying the # conformers `GetAtomPosition` method with the atom's id. for atom in mol.GetAtoms(): atom_id = atom.GetIdx() atom_position = conf.GetAtomPosition(atom_id) yield atom_id, np.array([*atom_position])
def mol_to_atom_feats_and_adjacency_list(mol: AllChem.Mol, atom_map_to_index_map=None, params: AtomFeatParams = None): """ :param atom_map_to_index_map: if you pass this in it will use the defined indices for each atom. Otherwise will use rdkit default indexing. """ params = AtomFeatParams() if params is None else params atoms = mol.GetAtoms() num_atoms = len(atoms) node_feats = np.zeros((num_atoms, params.atom_feature_length), dtype=np.float32) idx_to_atom_map = np.zeros(num_atoms, dtype=np.float32) if atom_map_to_index_map is None: # then we will create this map atom_map_to_index_map = {} use_supplied_idx_flg = False else: # we will use the mapping given use_supplied_idx_flg = True assert set(atom_map_to_index_map.values()) == set(range(len(atoms))), \ "if give pre supplied ordering it must be the same size as the molecules trying to order" # First we will create the atom features and the mappings for atom in atoms: props = atom.GetPropsAsDict() am = props['molAtomMapNumber'] # the atom mapping in the file if use_supplied_idx_flg: idx = atom_map_to_index_map[am] else: idx = atom.GetIdx() # goes from 0 to A-1 atom_map_to_index_map[am] = idx idx_to_atom_map[idx] = am atom_features = get_atom_features(atom, params) node_feats[idx, :] = atom_features # Now we will go through and create the adjacency lists adjacency_lists = {k: [] for k in params.bond_names} for bond in mol.GetBonds(): begin = bond.GetBeginAtom() end = bond.GetEndAtom() props_b = begin.GetPropsAsDict() props_e = end.GetPropsAsDict() am_b = props_b['molAtomMapNumber'] am_e = props_e['molAtomMapNumber'] ix_b = atom_map_to_index_map[am_b] ix_e = atom_map_to_index_map[am_e] bond_name = params.get_bond_name(bond) adjacency_lists[bond_name].append((ix_b, ix_e)) # Finally we pack all the results together res = graph_as_adj_list.GraphAsAdjList( node_feats, {k: np.array(v).T for k, v in adjacency_lists.items()}, np.zeros(node_feats.shape[0], dtype=data_types.INT)) return res
def get_cavity_size(mol: rdkit.Mol, origin, conformer): """Calculates diameter of the conformer from `origin`. The cavity is measured by finding the atom nearest to `origin`, correcting for van der Waals diameter and multiplying by -2. Args: mol: Molecule to calculate diameter of. origin: Coordinates of the position from which the cavity is measured. conformer: ID of the conformer to use. Returns: (float): Cavity size of the molecule. """ conf = mol.GetConformer(conformer) atom_vdw = np.array( [atom_vdw_radii[x.GetSymbol()] for x in mol.GetAtoms()]) distances = euclidean_distances(conf.GetPositions(), np.matrix(origin)) distances = distances.flatten() - atom_vdw return -2 * min(distances)
def standardize(compound: AllChem.Mol, add_hs=True, remove_stereo=True, thorough=False) -> AllChem.Mol: """ Standardizes an RDKit molecule by running various cleanup and sanitization operations. Parameters ---------- compound : rdkit.Chem.rdchem.Mol A chemical compound. add_hs : bool If True, adds hydrogens to the compound. remove_stereo : bool If True, removes stereochemistry info from the compound. thorough : bool If True, removes charge, isotopes, and small fragments from the compound. Returns ------- rdkit.Chem.rdchem.Mol The standardized compound. """ # basic cleanup Chem.Cleanup(compound) Chem.SanitizeMol(compound, sanitizeOps=Chem.SanitizeFlags.SANITIZE_ALL, catchErrors=False) AllChem.AssignStereochemistry(compound, cleanIt=True, force=True, flagPossibleStereoCenters=True) # remove isotopes, neutralize charge if thorough: for atom in compound.GetAtoms(): atom.SetIsotope(0) compound = _neutralize_charge(compound) Chem.SanitizeMol(compound, sanitizeOps=Chem.SanitizeFlags.SANITIZE_ALL, catchErrors=False) # remove stereochemistry if remove_stereo: Chem.RemoveStereochemistry(compound) # commute inchi compound = _commute_inchi(compound) # keep biggest fragment if thorough: compound = _strip_small_fragments(compound) Chem.SanitizeMol(compound, sanitizeOps=Chem.SanitizeFlags.SANITIZE_ALL, catchErrors=False) # neutralize charge compound = _neutralize_charge(compound) Chem.SanitizeMol(compound, sanitizeOps=Chem.SanitizeFlags.SANITIZE_ALL, catchErrors=False) # add protons if add_hs: return Chem.AddHs(compound, explicitOnly=False, addCoords=True) return compound
def apply_shift(mol: rdkit.Mol, shift, conformer=-1): """ Shifts the coordinates of all atoms. This does not modify the molecule. A modified copy is returned. Parameters ---------- shift : :class:`numpy.array` A numpy array holding the value of the shift along each axis. conformer : :class:`int`, optional The id of the conformer to use. Returns ------- :class:`rdkit.Chem.rdchem.Mol` A copy of the molecule where the coordinates have been shifted by `shift`. """ # The function does not modify the existing conformer, as a # result a new instance is created and used for modification. conf = rdkit.Conformer(mol.GetConformer(conformer)) # For each atom, get the atomic positions from the conformer # and shift them. Create a new geometry instance from these new # coordinate values. The geometry instance is used by rdkit to # store the coordinates of atoms. Finally, set the conformers # atomic position to the values stored in this newly generated # geometry instance. for atom in mol.GetAtoms(): # Remember the id of the atom you are currently using. It # is used to change the position of the correct atom at the # end of the loop. atom_id = atom.GetIdx() # `atom_position` in an instance holding in the x, y and z # coordinates of an atom in its 'x', 'y' and 'z' # attributes. atom_position = np.array(conf.GetAtomPosition(atom_id)) # Inducing the shift. new_atom_position = atom_position + shift # Creating a new geometry instance. new_coords = Point3D(*new_atom_position) # Changes the position of the atom in the conformer to the # values stored in the new geometry instance. conf.SetAtomPosition(atom_id, new_coords) # Create a new copy of the rdkit molecule instance representing # the molecule - the original instance is not to be modified. new_mol = rdkit.Mol(mol) # The new rdkit molecule was copied from the one held in the # `mol` attribute, as result it has a copy of its conformer. To # prevent the rdkit molecule from holding multiple conformers # the `RemoveAllConformers` method is run first. The shifted # conformer is then given to the rdkit molecule, which is # returned. new_mol.RemoveAllConformers() new_mol.AddConformer(conf) return new_mol
def _init_from_rdkit_mol( self, molecule: rdkit.Mol, functional_groups: typing.Iterable[typing.Union[ FunctionalGroup, FunctionalGroupFactory]], placer_ids: typing.Optional[tuple[int, ...]], ) -> None: """ Initialize from an :mod:`rdkit` molecule. Parameters: molecule: The molecule. functional_groups: An :class:`iterable` of :class:`.FunctionalGroup` or :class:`.FunctionalGroupFactory` or both. :class:`.FunctionalGroup` instances are added to the building block and :class:`.FunctionalGroupFactory` instances are used to create :class:`.FunctionalGroup` instances the building block should hold. :class:`.FunctionalGroup` instances are used to identify which atoms are modified during :class:`.ConstructedMolecule` construction. placer_ids: The ids of *placer* atoms. These are the atoms which should be used for calculating the position of the building block. Depending on the values passed to `placer_ids`, and the functional groups in the building block, different *placer* ids will be used by the building block. #. `placer_ids` is passed to the initializer: the passed *placer* ids will be used by the building block. #. `placer_ids` is ``None`` and the building block has functional groups: The *placer* ids of the functional groups will be used as the *placer* ids of the building block. #. `placer_ids` is ``None`` and `functional_groups` is empty. All atoms of the molecule will be used for *placer* ids. """ atoms = tuple( Atom(a.GetIdx(), a.GetAtomicNum(), a.GetFormalCharge()) for a in molecule.GetAtoms()) bonds = tuple( Bond(atom1=atoms[b.GetBeginAtomIdx()], atom2=atoms[b.GetEndAtomIdx()], order=(9 if b.GetBondType() == rdkit.BondType.DATIVE else b.GetBondTypeAsDouble())) for b in molecule.GetBonds()) position_matrix = molecule.GetConformer().GetPositions() super().__init__(atoms, bonds, position_matrix) self._with_functional_groups( self._extract_functional_groups( functional_groups=functional_groups, )) self._placer_ids = self._normalize_placer_ids( placer_ids=placer_ids, functional_groups=self._functional_groups, ) self._core_ids = frozenset( self._get_core_ids(functional_groups=self._functional_groups, ))
def _init_from_rdkit_mol( self, molecule: rdkit.Mol, functional_groups: _FunctionalGroups, placer_ids: typing.Optional[abc.Iterable[int]], ) -> None: """ Initialize from an :mod:`rdkit` molecule. Parameters: molecule: The molecule. functional_groups: The :class:`.FunctionalGroup` instances the building block should have, and / or :class:`.FunctionalGroupFactory` instances used for creating them. placer_ids: The ids of *placer* atoms. These are the atoms which should be used for calculating the position of the building block. Depending on the values passed to `placer_ids`, and the functional groups in the building block, different *placer* ids will be used by the building block. #. `placer_ids` is passed to the initializer: the passed *placer* ids will be used by the building block. #. `placer_ids` is ``None`` and the building block has functional groups: The *placer* ids of the functional groups will be used as the *placer* ids of the building block. #. `placer_ids` is ``None`` and `functional_groups` is empty. All atoms of the molecule will be used for *placer* ids. """ atoms = tuple( Atom( id=a.GetIdx(), atomic_number=a.GetAtomicNum(), charge=a.GetFormalCharge(), ) for a in molecule.GetAtoms()) bonds = tuple( Bond(atom1=atoms[b.GetBeginAtomIdx()], atom2=atoms[b.GetEndAtomIdx()], order=(9 if b.GetBondType() == rdkit.BondType.DATIVE else b.GetBondTypeAsDouble())) for b in molecule.GetBonds()) position_matrix = molecule.GetConformer().GetPositions() Molecule.__init__( self=self, atoms=atoms, bonds=bonds, position_matrix=position_matrix, ) self._with_functional_groups( self._extract_functional_groups( functional_groups=functional_groups, )) self._placer_ids = self._normalize_placer_ids( placer_ids=placer_ids, functional_groups=self._functional_groups, ) self._core_ids = frozenset( self._get_core_ids(functional_groups=self._functional_groups, ))