def _asa_uff(mol_complete: Molecule, ligands: Iterable[Molecule], core: Molecule, read_template: bool, job: Type[Job], settings: Settings) -> Tuple[float, float, float, float, int]: r"""Perform an activation strain analyses using RDKit UFF. Parameters ---------- mol_complete : |plams.Molecule| A Molecule representing the (unfragmented) relaxed structure of the system of interest. mol_fragments : :class:`Iterable<collections.abc.Iterable>` [|plams.Molecule|] An iterable of Molecules represnting the induvidual moleculair or atomic fragments within **mol_complete**. /**kwargs : :data:`Any<typing.Any>` Used for retaining compatbility with the signature of :func:`._asa_plams`. Returns ------- :class:`float`, :class:`float`, :class:`float` and :class:`int` The energy of **mol_complete**, the energy of **mol_fragments**, the energy of an optimized fragment within **mol_fragments** and the total number of fragments within **mol_fragments**. """ # Create RDKit molecules mol_complete = molkit.to_rdmol(mol_complete) rd_ligands = (molkit.to_rdmol(mol) for mol in ligands) # Calculate the energy of the total system E_complete = UFF(mol_complete, ignoreInterfragInteractions=False).CalcEnergy() # Calculate the (summed) energy of each individual fragment in the total system E_ligands = 0.0 E_min = np.inf mol_min = None for ligand_count, rdmol in enumerate(rd_ligands, 1): E = UFF(rdmol, ignoreInterfragInteractions=False).CalcEnergy() E_ligands += E if E < E_min: E_min, mol_min = E, rdmol # One of the calculations failed; better stop now if np.isnan(E_ligands): return np.nan, np.nan, np.nan, np.nan, ligand_count # Calculate the energy of an optimizes fragment UFF(mol_min, ignoreInterfragInteractions=False).Minimize() E_ligand_opt = UFF(mol_min, ignoreInterfragInteractions=False).CalcEnergy() E_core = UFF(molkit.to_rdmol(core), ignoreInterfragInteractions=False).CalcEnergy() return E_complete, E_ligands, E_core, E_ligand_opt, ligand_count
def set_qd(qd: Molecule, mol_dict: Settings) -> Molecule: """Update quantum dots imported by :func:`.read_mol`.""" # Create ligand (and anchor) molecules ligand = molkit.from_smiles(mol_dict.ligand_smiles) ligand_rdmol = molkit.to_rdmol(ligand) anchor = molkit.from_smiles(mol_dict.ligand_anchor) anchor_rdmol = molkit.to_rdmol(anchor) qd_rdmol = molkit.to_rdmol(qd) # Create arrays of atomic indices of the core and ligands lig_idx = 1 + np.array(qd_rdmol.GetSubstructMatches(ligand_rdmol)) core_idx = np.arange(1, len(qd))[~lig_idx] lig_idx = lig_idx.ravel().tolist() core_idx = core_idx.tolist() # Guess bonds if mol_dict.guess_bonds: qd.guess_bonds(atom_subset=[qd[i] for i in lig_idx]) # Reorder all atoms: core atoms first followed by ligands qd.atoms = [qd[i] for i in core_idx] + [qd[j] for i in lig_idx for j in i] # Construct a list with the indices of all ligand anchor atoms core_idx_max = 1 + len(core_idx) _anchor_idx = ligand_rdmol.GetSubstructMatch(anchor_rdmol)[0] start = core_idx_max + _anchor_idx stop = core_idx_max + _anchor_idx + np.product(lig_idx.shape) step = len(ligand) anchor_idx = list(range(start, stop, step)) # Update the properties of **qd** for i in anchor_idx: qd[i].properties.anchor = True qd.properties.indices = list(range(1, core_idx_max)) + anchor_idx qd.properties.job_path = [] qd.properties.name = mol_dict.name qd.properties.path = mol_dict.path qd.properties.ligand_smiles = Chem.CanonSmiles(mol_dict.ligand_smiles) qd.properties.ligand_anchor = f'{ligand[_anchor_idx].symbol}{_anchor_idx}' # Update the pdb_info of all atoms for i, at in enumerate(qd, 1): at.properties.pdb_info.SerialNumber = i if i <= core_idx_max: # A core atom at.properties.pdb_info.ResidueNumber = 1 else: # A ligand atom at.properties.pdb_info.ResidueNumber = 2 + int( (i - core_idx_max) / len(ligand))
def modified_minimum_scan_rdkit(ligand: Molecule, bond_tuple: Tuple[int, int], anchor: Atom) -> None: """A modified version of the :func:`.global_minimum_scan_rdkit` function. * Uses the ligand vector as criteria rather than the energy. * Geometry optimizations are constrained during the conformation search. * Finish with a final unconstrained geometry optimization. See Also -------- :func:`global_minimum_scan_rdkit<scm.plams.recipes.global_minimum.minimum_scan_rdkit>`: Optimize the molecule (RDKit UFF) with 3 different values for the given dihedral angle and find the lowest energy conformer. :param |Molecule| mol: The input molecule :param tuple bond_tuple: A 2-tuples containing the atomic indices of valid bonds :return |Molecule|: A copy of *mol* with a newly optimized geometry """ # Define a number of variables and create 3 copies of the ligand angles = (-120, 0, 120) mol_list = [ligand.copy() for _ in range(3)] for angle, mol in zip(angles, mol_list): bond = mol[bond_tuple] atom = mol[bond_tuple[0]] mol.rotate_bond(bond, atom, angle, unit='degree') rdmol_list = [molkit.to_rdmol(mol, properties=False) for mol in mol_list] # Optimize the (constrained) geometry for all dihedral angles in angle_list # The geometry that yields the minimum energy is returned fixed = _find_idx(mol, bond) for rdmol in rdmol_list: ff = UFF(rdmol) for f in fixed: ff.AddFixedPoint(f) ff.Minimize() # Find the conformation with the optimal ligand vector cost_list = [] try: i = ligand.atoms.index(anchor) except ValueError: i = -1 # Default to the origin as anchor for rdmol in rdmol_list: xyz = rdmol_as_array(rdmol) if i == -1: # Default to the origin as anchor xyz = np.vstack([xyz, [0, 0, 0]]) rotmat = optimize_rotmat(xyz, i) xyz[:] = xyz @ rotmat.T xyz -= xyz[i] cost = np.exp(xyz[:, 1:]).sum() cost_list.append(cost) # Perform an unconstrained optimization on the best geometry and update the geometry of ligand j = np.argmin(cost_list) rdmol_best = rdmol_list[j] UFF(rdmol).Minimize() ligand.from_rdmol(rdmol_best)
def get_current_value(self, mol: MolType) -> float: """Return the value of the coordinate.""" if isinstance(mol, Molecule): mol = molkit.to_rdmol(mol) conf = mol.GetConformer() # list of indices xs = [i - 1 for i in self.atoms] return self.fun(conf, *xs)
def substructure_split(ligand: Molecule, idx: Tuple[int, int], split: bool = True) -> Molecule: """Delete the hydrogen or mono-/polyatomic counterion attached to the functional group. Sets the charge of the remaining heteroatom to -1 if ``split=True``. Parameters ---------- ligand: |plams.Molecule|_ The ligand molecule. idx : |tuple|_ [|int|_] A tuple with 2 atomic indices associated with a functional group. split : bool If a functional group should be split from **ligand** (``True``) or not (``False``). Returns ------- |plams.Molecule|_ A copy of **ligand**, with part of its functional group removed (see **split**). """ lig = ligand.copy() at1 = lig[idx[0] + 1] at2 = lig[idx[-1] + 1] if split: lig.delete_atom(at2) mol_list = lig.separate_mod() for mol in mol_list: if at1 not in mol: continue lig = mol break # Check if the ligand heteroatom has a charge assigned, assigns a charge if not if not at1.properties.charge: at1.properties.charge = -1 # Update ligand properties lig.properties.dummies = at1 lig.properties.anchor = at1.symbol + str(lig.atoms.index(at1) + 1) lig.properties.charge = sum( atom.properties.get('charge', 0) for atom in lig) # Update the ligand smiles string rdmol = molkit.to_rdmol(lig) smiles = Chem.MolToSmiles(rdmol) lig.properties.smiles = Chem.CanonSmiles(smiles) lig.properties.name = santize_smiles( lig.properties.smiles) + '@' + lig.properties.anchor lig.properties.path = ligand.properties.path return lig
def get_current_value(self, mol): """ Value of the coordinate """ if isinstance(mol, Molecule): mol = molkit.to_rdmol(mol) conf = mol.GetConformer() # list of indices xs = [i - 1 for i in self.atoms] return self.fun(conf, *xs)
def sa_scores(mols: Iterable[Molecule], filename: Optional[PathType] = None) -> np.ndarray: """Calculate the synthetic accessibility score for all molecules in **mols**.""" sa_model = _load_sa_model(filename) if filename is not None else {} rdmols = (to_rdmol(mol) for mol in mols) try: count = len(mols) # type: ignore except TypeError: count = -1 iterator = (_compute_sas(mol, sa_model) for mol in rdmols) return np.fromiter(iterator, dtype=float, count=count)
def _parse_name_type(mol_dict: Settings) -> None: """Set the ``"name"`` and ``"type"`` keys in **mol_dict**. The new values of ``"name"`` and ``"type"`` depend on the value of ``mol_dict["mol"]``. Parameters ---------- mol_dict : |plams.Settings|_ A Settings instance containing the ``"mol"`` key. ``mol_dict["mol"]`` is exp Raises ------ TypeError Raised ``mol_dict["mol"]`` is an instance of neither :class:`str`, :class:`Molecule` nor :class:`mol`. """ mol = mol_dict.mol if isinstance(mol, str): if isfile(mol): # mol is a file mol_dict.type = mol.rsplit('.', 1)[-1] mol_dict.name = basename(mol.rsplit('.', 1)[0]) elif isdir(mol): # mol is a directory mol_dict.type = 'folder' mol_dict.name = basename(mol) else: # mol is (probably; hopefully?) a SMILES string i = 1 + len(mol_dict.path) if 'path' in mol_dict else 0 mol_dict.type = 'smiles' mol_dict.mol = mol[i:] mol_dict.name = santize_smiles(mol_dict.mol) elif isinstance(mol, Molecule): # mol is an instance of plams.Molecule mol_dict.type = 'plams_mol' if not mol.properties.name: mol_dict.name = Chem.MolToSmiles(Chem.RemoveHs(molkit.to_rdmol(mol)), canonical=True) else: mol_dict.name = mol.properties.name elif isinstance(mol, Chem.rdchem.Mol): # mol is an instance of rdkit.Chem.Mol mol_dict.type = 'rdmol' mol_dict.name = Chem.MolToSmiles(Chem.RemoveHs(mol), canonical=True) else: raise TypeError(f"mol_dict['mol'] expects an instance of 'str', 'Molecule' or 'Mol'; " f"observed type: '{mol.__class__.__name__}'")
def fix_h(mol: Molecule) -> None: """If a C=C-H angle is smaller than :math:`20` degrees, set it back to :math:`120` degrees. Performs an inplace update of **plams_mol**. Parameters ---------- plams_mol : |plams.Molecule|_ A PLAMS molecule. """ h_list = [ atom for atom in mol if atom.atnum == 1 and 2.0 in [bond.order for bond in mol.neighbors(atom)[0].bonds] ] rdmol = molkit.to_rdmol(mol) conf = rdmol.GetConformer() get_idx = mol.atoms.index set_angle = rdMolTransforms.SetAngleDeg get_angle = rdMolTransforms.GetAngleDeg update = False for atom in h_list: at1 = atom # Central atom at2 = mol.neighbors(at1)[0] # Neighbours at3 = [atom for atom in mol.neighbors(at2) if atom != at1] # Neighbours of neighbours # Create 2 sets of 3 atomic indices for defining angles: at1-at2=at3 idx_tup1 = get_idx(at3[0]), get_idx(at2), get_idx(at1) idx_tup2 = get_idx(at3[1]), get_idx(at2), get_idx(at1) if get_angle(conf, *idx_tup1) <= 20.0: set_angle(conf, *idx_tup1, 120.0) update = True elif get_angle(conf, *idx_tup2) <= 20.0: set_angle(conf, *idx_tup2, 120.0) update = True if update: mol.from_rdmol(rdmol)
def adf_connectivity(mol: Molecule) -> List[str]: """Create an AMS-compatible connectivity list. Parameters ---------- mol : |plams.Molecule|_ A PLAMS molecule with :math:`n` bonds. Returns ------- :math:`n` |list|_ [|str|_] An ADF-compatible connectivity list of :math:`n` bonds. """ mol.set_atoms_id() # Create list of indices of all aromatic bonds try: rdmol = molkit.to_rdmol(mol) except Exception as ex: if type(ex) is ValueError or ex.__class__.__name__ == 'ArgumentError': # Plan B: ignore aromatic bonds bonds = [ f'{bond.atom1.id} {bond.atom2.id} {bond.order:.1f}' for bond in mol.bonds ] mol.unset_atoms_id() return bonds raise ex aromatic = [bond.GetIsAromatic() for bond in rdmol.GetBonds()] # Create a list of bond orders; aromatic bonds get a bond order of 1.5 bond_orders = [(1.5 if ar else bond.order) for ar, bond in zip(aromatic, mol.bonds)] bonds = [ f'{bond.atom1.id} {bond.atom2.id} {order:.1f}' for bond, order in zip(mol.bonds, bond_orders) ] mol.unset_atoms_id() return bonds
def fix_carboxyl(mol: Molecule) -> None: """Resets carboxylate OCO angles if it is smaller than :math:`60` degrees. Performs an inplace update of **plams_mol**. Parameters ---------- plams_mol : |plams.Molecule|_ A PLAMS molecule. """ rdmol = molkit.to_rdmol(mol) conf = rdmol.GetConformer() matches = rdmol.GetSubstructMatches(_CARBOXYLATE) if matches: get_angle = rdMolTransforms.GetAngleDeg set_angle = rdMolTransforms.SetAngleDeg for idx in matches: if get_angle(conf, idx[3], idx[1], idx[0]) < 60: set_angle(conf, idx[2], idx[1], idx[3], 180.0) set_angle(conf, idx[0], idx[1], idx[3], 120.0) mol.from_rdmol(rdmol)
def set_mol_prop(mol: Molecule, mol_dict: Settings) -> None: """Set molecular and atomic properties.""" if mol_dict.is_core: residue_name = 'COR' mol.properties.name = mol.get_formula() else: residue_name = 'LIG' mol.properties.name = mol_dict.name mol.properties.dummies = mol_dict.indices mol.properties.path = mol_dict.path mol.properties.job_path = [] # Prepare a generator of letters for pdb_info.Name alphabet = itertools.combinations(ascii_letters, 2) # Set the atomic properties for atom, i in zip(mol, itertools.cycle(alphabet)): set_atom_prop(atom, i, residue_name) if not mol.properties.smiles: mol.properties.smiles = Chem.MolToSmiles(Chem.RemoveHs( molkit.to_rdmol(mol)), canonical=True)
def _get_value(mol: Molecule) -> Tuple[List[str], int]: """Return a partially deserialized .pdb file and the length of aforementioned file.""" ret = Chem.MolToPDBBlock(molkit.to_rdmol(mol)).splitlines() return ret, len(ret)
def canonicalize_mol(mol: Molecule, inplace: bool = True) -> Optional[Molecule]: """Take a PLAMS molecule and sort its atoms based on their canonical rank. .. _rdkit.Chem.CanonicalRankAtoms: https://www.rdkit.org/docs/source/rdkit.Chem.rdmolfiles.html#rdkit.Chem.rdmolfiles.CanonicalRankAtoms Examples -------- .. code:: python >>> from scm.plams import Molecule, from_smiles # Methane >>> mol: Molecule = from_smiles('C') >>> print(mol) # doctest: +SKIP Atoms: 1 H 0.640510 0.640510 -0.640510 2 H 0.640510 -0.640510 0.640510 3 C 0.000000 0.000000 0.000000 4 H -0.640510 0.640510 0.640510 5 H -0.640510 -0.640510 -0.640510 >>> canonicalize_mol(mol) >>> print(mol) # doctest: +SKIP Atoms: 1 C 0.000000 0.000000 0.000000 2 H -0.640510 -0.640510 -0.640510 3 H -0.640510 0.640510 0.640510 4 H 0.640510 -0.640510 0.640510 5 H 0.640510 0.640510 -0.640510 Parameters ---------- mol : |plams.Molecule|_ A PLAMS molecule. inplace : bool If ``True``, perform an inplace update of **mol** rather than returning a new :class:`Molecule` instance. Returns ------- |plams.Molecule|_ Optional: if ``inplace=False``, return a copy of **mol** with its atoms sorted by their canonical rank. See Also -------- * rdkit.Chem.CanonicalRankAtoms_: Returns the canonical atom ranking for each atom of a molecule fragment. """ # noqa rdmol = molkit.to_rdmol(mol) idx_collection = Chem.CanonicalRankAtoms(rdmol) # Reverse sort Molecule.atoms by the atomic indices in idx_collection if inplace: mol.atoms = [ at for _, at in sorted(zip(idx_collection, mol.atoms), reverse=True) ] return else: ret = mol.copy() ret.atoms = [ at for _, at in sorted(zip(idx_collection, ret.atoms), reverse=True) ] return ret
def set_dihed(self, angle: float, anchor: Atom, cap: Sequence[Atom], opt: bool = True, unit: str = 'degree') -> None: """Change all valid dihedral angles into a specific value. Performs an inplace update of this instance. Parameters ---------- angle : :class:`float` The desired dihedral angle. anchor : |plams.Atom| The ligand anchor atom. opt : :class:`bool` Whether or not the dihedral adjustment should be followed up by an RDKit UFF optimization. unit : :class:`str` The input unit. """ cap_atnum = [] for at in cap: cap_atnum.append(at.atnum) at.atnum = 0 angle = Units.convert(angle, unit, 'degree') bond_iter = (bond for bond in self.bonds if bond.atom1.atnum != 1 and bond.atom2.atnum != 1 and bond.order == 1 and not self.in_ring(bond)) # Correction factor for, most importantly, tri-valent anchors (e.g. P(R)(R)R) dihed_cor = angle / 2 neighbors = anchor.neighbors() if len(neighbors) > 2: atom_list = [anchor] + sorted(neighbors, key=lambda at: -at.atnum)[:3] improper = get_dihed(atom_list) dihed_cor *= np.sign(improper) for bond in bond_iter: # Gather lists of all non-hydrogen neighbors n1, n2 = self.neighbors_mod(bond.atom1), self.neighbors_mod(bond.atom2) # Remove all atoms in `bond` n1 = [atom for atom in n1 if atom is not bond.atom2] n2 = [atom for atom in n2 if atom is not bond.atom1] # Remove all non-subsituted atoms # A special case consists of anchor atoms; they can stay if len(n1) > 1: n1 = [ atom for atom in n1 if (len(self.neighbors_mod(atom)) > 1 or atom is anchor or atom.atnum == 0) ] if len(n2) > 1: n2 = [ atom for atom in n2 if (len(self.neighbors_mod(atom)) > 1 or atom is anchor or atom.atnum == 0) ] # Set `bond` in an anti-periplanar conformation if n1 and n2: dihed = get_dihed((n1[0], bond.atom1, bond.atom2, n2[0])) if anchor not in bond: self.rotate_bond(bond, bond.atom1, angle - dihed, unit='degree') else: dihed -= dihed_cor self.rotate_bond(bond, bond.atom1, -dihed, unit='degree') dihed_cor *= -1 for at, atnum in zip(cap, cap_atnum): at.atnum = atnum if opt: rdmol = molkit.to_rdmol(self) UFF(rdmol).Minimize() self.from_rdmol(rdmol)
def find_substructure( ligand: Molecule, func_groups: Iterable[Chem.Mol], split: bool = True, condition: Optional[Callable[[int], bool]] = None) -> List[Molecule]: """Identify interesting functional groups within the ligand. Parameters ---------- ligand : |plams.Molecule|_ The ligand molecule. func_groups : |tuple|_ [|Chem.Mol|_] A collection of RDKit molecules representing functional groups. split : bool If a functional group should be split from **ligand** (``True``) or not (``False``). Returns ------- |list|_ [|plams.Molecule|_] A list of ligands. A single copy of **ligand** is created for each identified functional group, removing parts of the functional group if required (see **split**). An empty list is returned if no valid functional groups are found. """ rdmol = molkit.to_rdmol(ligand) # Searches for functional groups (defined by functional_group_list) within the ligand get_match = rdmol.GetSubstructMatches matches = chain.from_iterable( get_match(mol, useChirality=True) for mol in func_groups) # Remove all duplicate matches, each heteroatom (match[0]) should have <= 1 entry ligand_indices = [] ref = [] for idx_tup in matches: i, *_ = idx_tup if i in ref: continue # Skip duplicates ligand_indices.append(idx_tup) ref.append(i) if condition is not None: if not condition(len(ligand_indices)): err = ( f"Failed to satisfy the passed condition ({condition!r}) for " f"ligand: {ligand.properties.name!r}") logger.error(err) return [] if ligand_indices: return [ substructure_split(ligand, tup, split) for tup in ligand_indices ] else: err = ( f"No functional groups were found (optional.ligand.split = {split!r}) for " f"ligand: {ligand.properties.name!r}") logger.error(err) return []