def modified_minimum_scan_rdkit(ligand: Molecule, bond_tuple: Tuple[int, int], anchor: Atom) -> None: """A modified version of the :func:`.global_minimum_scan_rdkit` function. * Uses the ligand vector as criteria rather than the energy. * Geometry optimizations are constrained during the conformation search. * Finish with a final unconstrained geometry optimization. See Also -------- :func:`global_minimum_scan_rdkit<scm.plams.recipes.global_minimum.minimum_scan_rdkit>`: Optimize the molecule (RDKit UFF) with 3 different values for the given dihedral angle and find the lowest energy conformer. :param |Molecule| mol: The input molecule :param tuple bond_tuple: A 2-tuples containing the atomic indices of valid bonds :return |Molecule|: A copy of *mol* with a newly optimized geometry """ # Define a number of variables and create 3 copies of the ligand angles = (-120, 0, 120) mol_list = [ligand.copy() for _ in range(3)] for angle, mol in zip(angles, mol_list): bond = mol[bond_tuple] atom = mol[bond_tuple[0]] mol.rotate_bond(bond, atom, angle, unit='degree') rdmol_list = [molkit.to_rdmol(mol, properties=False) for mol in mol_list] # Optimize the (constrained) geometry for all dihedral angles in angle_list # The geometry that yields the minimum energy is returned fixed = _find_idx(mol, bond) for rdmol in rdmol_list: ff = UFF(rdmol) for f in fixed: ff.AddFixedPoint(f) ff.Minimize() # Find the conformation with the optimal ligand vector cost_list = [] try: i = ligand.atoms.index(anchor) except ValueError: i = -1 # Default to the origin as anchor for rdmol in rdmol_list: xyz = rdmol_as_array(rdmol) if i == -1: # Default to the origin as anchor xyz = np.vstack([xyz, [0, 0, 0]]) rotmat = optimize_rotmat(xyz, i) xyz[:] = xyz @ rotmat.T xyz -= xyz[i] cost = np.exp(xyz[:, 1:]).sum() cost_list.append(cost) # Perform an unconstrained optimization on the best geometry and update the geometry of ligand j = np.argmin(cost_list) rdmol_best = rdmol_list[j] UFF(rdmol).Minimize() ligand.from_rdmol(rdmol_best)
def substructure_split(ligand: Molecule, idx: Tuple[int, int], split: bool = True) -> Molecule: """Delete the hydrogen or mono-/polyatomic counterion attached to the functional group. Sets the charge of the remaining heteroatom to -1 if ``split=True``. Parameters ---------- ligand: |plams.Molecule|_ The ligand molecule. idx : |tuple|_ [|int|_] A tuple with 2 atomic indices associated with a functional group. split : bool If a functional group should be split from **ligand** (``True``) or not (``False``). Returns ------- |plams.Molecule|_ A copy of **ligand**, with part of its functional group removed (see **split**). """ lig = ligand.copy() at1 = lig[idx[0] + 1] at2 = lig[idx[-1] + 1] if split: lig.delete_atom(at2) mol_list = lig.separate_mod() for mol in mol_list: if at1 not in mol: continue lig = mol break # Check if the ligand heteroatom has a charge assigned, assigns a charge if not if not at1.properties.charge: at1.properties.charge = -1 # Update ligand properties lig.properties.dummies = at1 lig.properties.anchor = at1.symbol + str(lig.atoms.index(at1) + 1) lig.properties.charge = sum( atom.properties.get('charge', 0) for atom in lig) # Update the ligand smiles string rdmol = molkit.to_rdmol(lig) smiles = Chem.MolToSmiles(rdmol) lig.properties.smiles = Chem.CanonSmiles(smiles) lig.properties.name = santize_smiles( lig.properties.smiles) + '@' + lig.properties.anchor lig.properties.path = ligand.properties.path return lig
def _protonate_mol(mol: Molecule) -> Molecule: mol_cp = mol.copy() i = mol.index(mol.properties.dummies) anchor = mol_cp[i] if anchor.properties.charge != -1: raise NotImplementedError("Non-anionic anchors are not supported") anchor.properties.charge = 0 return add_Hs(mol_cp, forcefield="uff")
def _get_ligand(mol: Molecule) -> Molecule: """Extract a single ligand from **mol** as a copy.""" at_list = [] res = mol.atoms[-1].properties.pdb_info.ResidueNumber for at in reversed(mol.atoms): if at.properties.pdb_info.ResidueNumber == res: at_list.append(at) else: ret = Molecule() ret.atoms = at_list ret.bonds = list( set(chain.from_iterable(at.bonds for at in at_list))) return ret.copy()
def get_asa_fragments(qd: Molecule) -> Tuple[List[Molecule], Molecule]: """Construct the fragments for an activation strain analyses. Parameters ---------- qd : |plams.Molecule| A Molecule whose atoms' properties should be marked with `pdb_info.ResidueName`. Atoms in the core should herein be marked with ``"COR"``. Returns ------- :class:`list` [|plams.Molecule|] and |plams.Molecule| A list of ligands and the core. Fragments are defined based on connectivity patterns (or lack thereof). """ # Delete all atoms within the core mol_complete = qd.copy() core = Molecule() core.properties = mol_complete.properties.copy() core_atoms = [at for at in mol_complete if at.properties.pdb_info.ResidueName == 'COR'] for atom in core_atoms: mol_complete.delete_atom(atom) atom.mol = core core.atoms = core_atoms mol_complete.properties.name += '_frags' core.properties.name += '_core' # Fragment the molecule ligand_list = mol_complete.separate() # Set atomic properties for at1, at2 in zip(chain(*ligand_list), mol_complete): at1.properties.symbol = at2.properties.symbol at1.properties.charge_float = at2.properties.charge_float for at1, at2 in zip(core, qd): at1.properties.symbol = at2.properties.symbol at1.properties.charge_float = at2.properties.charge_float # Set the prm parameter which points to the created .prm file name = mol_complete.properties.name[:-1] path = mol_complete.properties.path prm = mol_complete.properties.prm for mol in ligand_list: mol.properties.name = name mol.properties.path = path mol.properties.prm = prm return ligand_list, core
def _get_neutral_frag(frag: Molecule) -> Molecule: """Return a neutral fragment for :func:`md_generator`.""" frag_neutral = frag.copy() for anchor in frag_neutral: if anchor.properties.anchor: charge = anchor.properties.charge if charge > 0: run_ff_cationic(frag_neutral, anchor, MATCH_SETTINGS) elif charge < 0: run_ff_anionic(frag_neutral, anchor, MATCH_SETTINGS) break else: raise MoleculeError("Failed to identify the anchor atom within 'frag'") return frag_neutral
def _lig_from_psf(mol: Molecule, psf: PSFContainer) -> Dict[FrozenSet[int], Molecule]: residue_set = set(psf.segment_name[psf.residue_name == 'LIG']) ret: Dict[FrozenSet[int], Molecule] = {} for res in residue_set: _key = psf.residue_id[psf.segment_name == res] key = frozenset(_key) res_id = _key.iat[0] idx = psf.atoms[psf.residue_id == res_id].index i = idx[0] - 1 j = idx[-1] ret[key] = mol.copy(atoms=mol.atoms[i:j]) ret[key].round_coords(decimals=3) return ret
def array_to_qd(mol: Molecule, xyz_array: np.ndarray, mol_out: Optional[Molecule] = None) -> Optional[List[Molecule]]: """Create :math:`n` copies of **mol** and update their Cartesian coordinates with **xyz_array**. Parameters ---------- mol : |plams.Molecule|_ A template PLAMS molecule consisting of :math:`n` atoms. xyz_array : :math:`m*n*3` |np.ndarray|_: A 3D array-like object representing the cartesian coordinates of :math:`m` molecules each with :math:`n` atoms. mol_out : |plams.Molecule|_ Optional: If not ``None`` merge the to-be returned molecules with **mol_out**. Returns ------- |list|_ [|Molecule|_] Optional: if **mol_out** = ``None`` return a list **mol** copies, each with its Cartesian coordinates replaced with a set of coordinates from **xyz_array**. """ mol_list = [] xyz_array = sanitize_dim_3(xyz_array) if mol_out is None: start = 2 else: start = 1 + mol_out[-1].properties.get('pdb_info', {}).get('ResidueNumber', 1) for i, xyz in enumerate(xyz_array, start=start): mol_cp = mol.copy() mol_cp.from_array(xyz) for at in mol_cp: at.properties.pdb_info.ResidueNumber = i mol_list.append(mol_cp) # return a list of molecules or merge the list with an existing molecule if mol_out is None: return mol_list else: for m in mol_list: mol_out.add_molecule(m) return None
def ligand_to_qd(core: Molecule, ligand: Molecule, path: str, allignment: str = 'sphere', idx_subset: Optional[Iterable[int]] = None) -> Molecule: """Function that handles quantum dot (qd, *i.e.* core + all ligands) operations. Combine the core and ligands and assign properties to the quantom dot. Parameters ---------- core : |plams.Molecule|_ A core molecule. ligand : |plams.Molecule|_ A ligand molecule. allignment : :class:`str` How the core vector(s) should be defined. Accepted values are ``"sphere"`` and ``"surface"``: * ``"sphere"``: Vectors from the core anchor atoms to the center of the core. * ``"surface"``: Vectors perpendicular to the surface of the core. Note that for a perfect sphere both approaches are equivalent. idx_subset : :class:`Iterable<collections.anc.Iterable>` [:class:`int`], optional An iterable with the (0-based) indices defining a subset of atoms in **core**. Only relevant in the construction of the convex hull when ``allignment=surface``. Returns ------- |plams.Molecule|_ A quantum dot consisting of a core molecule and *n* ligands """ def get_name() -> str: core_name = core.properties.name anchor = str(qd[-1].properties.pdb_info.ResidueNumber - 1) lig_name = ligand.properties.name return f'{core_name}__{anchor}_{lig_name}' idx_subset_ = idx_subset if idx_subset is not None else ... # Define vectors and indices used for rotation and translation the ligands vec1 = np.array([-1, 0, 0], dtype=float) # All ligands are already alligned along the X-axis idx = ligand.get_index(ligand.properties.dummies) - 1 ligand.properties.dummies.properties.anchor = True # Attach the rotated ligands to the core, returning the resulting strucutre (PLAMS Molecule). if allignment == 'sphere': vec2 = np.array(core.get_center_of_mass()) - sanitize_dim_2(core.properties.dummies) vec2 /= np.linalg.norm(vec2, axis=1)[..., None] elif allignment == 'surface': if isinstance(core.properties.dummies, np.ndarray): anchor = core.properties.dummies else: anchor = core.as_array(core.properties.dummies) vec2 = -get_surface_vec(np.array(core)[idx_subset_], anchor) else: raise ValueError(repr(allignment)) lig_array = rot_mol(ligand, vec1, vec2, atoms_other=core.properties.dummies, core=core, idx=idx) qd = core.copy() array_to_qd(ligand, lig_array, mol_out=qd) qd.round_coords() # Set properties qd.properties = Settings({ 'indices': [i for i, at in enumerate(qd, 1) if at.properties.pdb_info.ResidueName == 'COR' or at.properties.anchor], 'path': path, 'name': get_name(), 'job_path': [], 'prm': ligand.properties.get('prm') }) # Print and return _evaluate_distance(qd, qd.properties.name) return qd
def _construct_xyn( ion: str | int | Molecule, lig_count: int, lig: Molecule, lig_at: Atom, lig_idx: int, ) -> Tuple[Molecule, Atom]: """Construct the :math:`XYn` molecule for :func:`get_xyn`. Parameters ---------- ion : |str|_, |int|_ or |plams.Molecule|_ An ion (:math:`X`), be it mono- (*e.g.* atomic number or symbol) or poly-atomic. lig_count : int The number of to-be attached ligands per ion. lig : |plams.Molecule|_ A single ligand molecule. lig_at : |plams.Atom|_ The ligand anchor atom. lig_idx : int The (1-based) index of **lig_at**. Returns ------- |plams.Molecule|_ and |plams.Atom|_ A :math:`XY_{n}` molecule and the the charged atom from :math:`X`. """ # Create a list of n ligands, n anchor atoms, n desired ion-anchor distances and n angles lig_gen = (lig.copy() for _ in range(lig_count)) angle_ar = np.arange(0, 2 * np.pi, 2 * np.pi / lig_count) # Prepare vectors for translations and rotations vec1 = lig_at.vector_to(np.zeros(3)) _vec = lig_at.vector_to(lig.get_center_of_mass()) vec2 = get_perpendicular_vec(_vec) # Update the XYn molecule with ligands XYn, X = _parse_ion(ion) iterator = enumerate(zip(angle_ar, lig_gen), 2) for i, (angle, mol) in iterator: # Prepare for translations and rotations anchor = mol[lig_idx] rotmat = axis_rotation_matrix(vec2, angle) dist = anchor.radius + X.radius # Translate and rotate the ligand mol.translate(vec1) mol.rotate(rotmat) vec3 = anchor.vector_to(mol.get_center_of_mass()) vec3 /= np.linalg.norm(vec3) / dist mol.translate(vec3) # Set pdb attributes for at in mol: at.properties.pdb_info.ResidueNumber = i at.properties.pdb_info.ResidueName = 'LIG' # Combine the translated and rotated ligand with XYn XYn.add_molecule(mol) XYn.add_bond(X, anchor) return XYn, X
def dissociate_surface(mol: Molecule, idx: npt.ArrayLike, symbol: str = 'Cl', lig_count: int = 1, k: int = 4, displacement_factor: float = 0.5, **kwargs: Any) -> Generator[Molecule, None, None]: r"""A workflow for dissociating :math:`(XY_{n})_{\le m}` compounds from the surface of **mol**. The workflow consists of four distinct steps: 1. Identify which atoms :math:`Y`, as specified by **symbol**, are located on the surface of **mol**. 2. Identify which surface atoms are neighbors of :math:`X`, the latter being defined by **idx**. 3. Identify which pairs of :math:`n*m` neighboring surface atoms are furthest removed from each other. :math:`n` is defined by **lig_count** and :math:`m`, if applicable, by the index along axis 1 of **idx**. 4. Yield :math:`(XY_{n})_{\le m}` molecules constructed from **mol**. Note ---- The indices supplied in **idx** will, when applicable, be sorted along its last axis. Examples -------- .. code:: python >>> from pathlib import Path >>> import numpy as np >>> from scm.plams import Molecule >>> from CAT.recipes import dissociate_surface, row_accumulator >>> base_path = Path(...) >>> mol = Molecule(base_path / 'mol.xyz') # The indices of, e.g., Cs-pairs >>> idx = np.array([ ... [1, 3], ... [4, 5], ... [6, 10], ... [15, 12], ... [99, 105], ... [20, 4] ... ]) # Convert 1- to 0-based indices by substracting 1 from idx >>> mol_generator = dissociate_surface(mol, idx-1, symbol='Cl', lig_count=1) # Note: The indices in idx are (always) be sorted along axis 1 >>> iterator = zip(row_accumulator(np.sort(idx, axis=1)), mol_generator) >>> for i, mol in iterator: ... mol.write(base_path / f'output{i}.xyz') Parameters ---------- mol : :class:`Molecule<scm.plams.mol.molecule.Molecule>` The input molecule. idx : array-like, dimensions: :math:`\le 2` An array of indices denoting to-be dissociated atoms (*i.e.* :math:`X`); its elements will, if applicable, be sorted along the last axis. If a 2D array is provided then all elements along axis 1 will be dissociated in a cumulative manner. :math:`m` is herein defined as the index along axis 1. symbol : :class:`str` or :class:`int` An atomic symbol or number defining the super-set of the atoms to-be dissociated in combination with **idx** (*i.e.* :math:`Y`). lig_count : :class:`int` The number of atoms specified in **symbol** to-be dissociated in combination with a single atom from **idx** (*i.e.* :math:`n`). k : :class:`int` The number of atoms specified in **symbol** which are surrounding a single atom in **idx**. Must obey the following condition: :math:`k \ge 1`. displacement_factor : :class:`float` The smoothing factor :math:`n` for constructing a convex hull; should obey :math:`0 <= n <= 1`. Represents the degree of displacement of all atoms with respect to a spherical surface; :math:`n = 1` is a complete projection while :math:`n = 0` means no displacement at all. A non-zero value is generally recomended here, as the herein utilized :class:`ConvexHull<scipy.spatial.ConvexHull>` class requires an adequate degree of surface-convexness, lest it fails to properly identify all valid surface points. \**kwargs : :data:`Any<typing.Any>` Further keyword arguments for :func:`brute_uniform_idx()<CAT.attachment.distribution_brute.brute_uniform_idx>`. Yields ------ :class:`Molecule<scm.plams.mol.molecule.Molecule>` Yields new :math:`(XY_{n})_{m}`-dissociated molecules. See Also -------- :func:`brute_uniform_idx()<CAT.attachment.distribution_brute.brute_uniform_idx>` Brute force approach to creating uniform or clustered distributions. :func:`identify_surface()<nanoCAT.bde.identify_surface.identify_surface>` Take a molecule and identify which atoms are located on the surface, rather than in the bulk. :func:`identify_surface_ch()<nanoCAT.bde.identify_surface.identify_surface_ch>` Identify the surface of a molecule using a convex hull-based approach. :func:`dissociate_ligand()<nanoCAT.bde.dissociate_xyn.dissociate_ligand>` Remove :math:`XY_{n}` from **mol** with the help of the :class:`MolDissociater<nanoCAT.bde.dissociate_xyn.MolDissociater>` class. """ idx_ar = _parse_idx(idx, ndim=2, copy=False) if idx_ar.ndim > 2: raise ValueError("'idx' expected a 2D array-like object; " f"observed number of dimensions: {idx_ar.ndim}") idx_ar.sort(axis=1) idx_ar = idx_ar[:, ::-1] # Identify all atoms in **idx** located on the surface idx_surface_superset = _get_surface( mol, symbol=symbol, displacement_factor=displacement_factor ) # Construct an array with the indices of opposing surface-atoms n = lig_count * idx_ar.shape[1] idx_surface = _get_opposite_neighbor( np.asarray(mol), idx_ar, idx_surface_superset, n=n, k=k, **kwargs ) # Dissociate and yield new molecules idx_ar += 1 idx_surface += 1 for idx_pair, idx_pair_surface in zip(idx_ar, idx_surface): mol_tmp = mol.copy() _mark_atoms(mol_tmp, idx_pair_surface) for i in idx_pair: mol_tmp = next(dissociate_ligand(mol_tmp, lig_count=lig_count, core_index=i, lig_core_pairs=1, **kwargs)) yield mol_tmp
def dissociate_bulk( mol: Molecule, symbol_x: str, symbol_y: None | str = None, count_x: int = 1, count_y: int = 1, n_pairs: int = 1, k: None | int = 4, r_max: None | float = None, mode: _ModeKind = 'uniform', **kwargs: Any, ) -> Molecule: r"""A workflow for removing :math:`XY`-based compounds from the bulk of **mol**. Examples -------- .. code-block:: python >>> from scm.plams import Molecule >>> from CAT.recipes import dissociate_bulk >>> mol: Molecule = ... # Remove two PbBr2 pairs in a system where # each lead atom is surrounded by 6 bromides >>> mol_out1 = dissociate_bulk( ... mol, symbol_x="Pb", symbol_y="Br", count_y=2, n_pairs=2, k=6 ... ) # The same as before, expect all potential bromides are # identified based on a radius, rather than a fixed number >>> mol_out2 = dissociate_bulk( ... mol, symbol_x="Pb", symbol_y="Br", count_y=2, n_pairs=2, r_max=5.0 ... ) # Convert a fraction to a number of pairs >>> f = 0.5 >>> count_x = 2 >>> symbol_x = "Pb" >>> n_pairs = int(f * sum(at.symbol == symbol_x for at in mol) / count_x) >>> mol_out3 = dissociate_bulk( ... mol, symbol_x="Pb", symbol_y="Br", count_y=2, n_pairs=n_pairs, k=6 ... ) Parameters ---------- mol : :class:`~scm.plams.mol.molecule.Molecule` The input molecule. symbol_x : :class:`str` or :class:`int` The atomic symbol or number of the central (to-be dissociated) atom(s) :math:`X`. symbol_y : :class:`str` or :class:`int`, optional The atomic symbol or number of the surrounding (to-be dissociated) atom(s) :math:`Y`. If :data:`None`, do not dissociate any atoms :math:`Y`. count_x : :class:`int` The number of central atoms :math:`X` per individual to-be dissociated cluster. count_y : :class:`int` The number of surrounding atoms :math:`Y` per individual to-be dissociated cluster. n_pairs : :class:`int` The number of to-be removed :math:`XY` fragments. k : :class:`int`, optional The total number of :math:`Y` candidates surrounding each atom :math:`X`. This value should be smaller than or equal to **count_y**. See the **r_max** parameter for a radius-based approach; note that both parameters are not mutually exclusive. r_max : :class:`int`, optional The radius used for searching for :math:`Y` candidates surrounding each atom :math:`X`. See **k** parameter to use a fixed number of nearest neighbors; note that both parameters are not mutually exclusive. mode : :class:`str` How the subset of to-be removed atoms :math:`X` should be generated. Accepts one of the following values: * ``"random"``: A random distribution. * ``"uniform"``: A uniform distribution; the distance between each successive atom and all previous points is maximized. * ``"cluster"``: A clustered distribution; the distance between each successive atom and all previous points is minmized. Keyword Arguments ----------------- \**kwargs : :data:`~typing.Any` Further keyword arguments for :func:`CAT.distribution.distribute_idx`. Returns ------- :class:`~scm.plams.mol.molecule.Molecule` The molecule with :math:`n_{\text{pair}} * XY` fragments removed. """ if n_pairs < 0: raise ValueError("`n_pairs` must be larger than or equal to 0; " f"observed value: {n_pairs!r}") elif n_pairs == 0: return mol.copy() # Validate the input args if count_x <= 0: raise ValueError("`count_x` expected a an integer larger than 0; " f"observed value: {count_x!r}") elif count_y < 0: raise ValueError("`count_y` expected a an integer larger than 0; " f"observed value: {count_y!r}") elif count_y == 0: symbol_y = None # Parse `symbol_x` atnum_x = to_atnum(symbol_x) idx_x = np.fromiter([i for i, at in enumerate(mol) if at.atnum == atnum_x], dtype=np.intp) if len(idx_x) == 0: raise MoleculeError(f"No atoms {to_symbol(atnum_x)!r} in {mol.get_formula()}") # Parse `symbol_y` if symbol_y is not None: atnum_y = to_atnum(symbol_y) idx_y = np.fromiter([i for i, at in enumerate(mol) if at.atnum == atnum_y], dtype=np.intp) if len(idx_y) == 0: raise MoleculeError(f"No atoms {to_symbol(atnum_y)!r} in {mol.get_formula()}") # Parse `k` and `r_max` if k is None and r_max is None: raise TypeError("`k` and `r_max` cannot be both set to None") if k is None: k = cast(int, len(idx_y)) if r_max is None: r_max = cast(float, np.inf) # Validate `n_pairs` if (n_pairs * count_x) > len(idx_x): x = n_pairs * count_x raise ValueError( f"Insufficient atoms {to_symbol(atnum_x)!r} in {mol.get_formula()}; " f"atleast {x} required with `n_pairs={n_pairs!r}` and `count_x={count_x!r}" ) elif symbol_y is not None and (n_pairs * count_y) > len(idx_y): y = n_pairs * count_y raise ValueError( f"Insufficient atoms {to_symbol(atnum_y)!r} in {mol.get_formula()}; " f"atleast {y} required with `n_pairs={n_pairs!r}` and `count_y={count_y!r}" ) # Identify a subset of atoms `x` if count_x != 1: if "cluster_size" in kwargs: raise TypeError("Specifying both `cluster_size` and `count_x` is not supported") kwargs["cluster_size"] = count_x idx_x_sorted = distribute_idx(mol, idx_x, f=1, **kwargs) idx_x_sorted.shape = -1, count_x if symbol_y is not None: # Identify a subset of matching atoms `y` idx_x_subset, idx_y_subset, = _get_opposite_neighbor2( np.asarray(mol), idx_x_sorted, idx_y, n_pairs, n=count_y, k=k, r_max=r_max ) # Concatenate the subsets idx_xy_subset = np.concatenate([idx_x_subset.ravel(), idx_y_subset.ravel()]) else: idx_xy_subset = idx_x[:(n_pairs * count_x)] idx_xy_subset.sort() idx_xy_subset += 1 # Create and return the new molecule ret = mol.copy() atom_set = {ret[i] for i in idx_xy_subset} for at in atom_set: ret.delete_atom(at) return ret
def replace_surface(mol: Molecule, symbol: Union[str, int], symbol_new: Union[str, int] = 'Cl', nth_shell: Union[int, Iterable[int]] = 0, f: float = 0.5, mode: str = 'uniform', displacement_factor: float = 0.5, **kwargs: Any) -> Molecule: r"""A workflow for identifying all surface atoms in **mol** and replacing a subset of them. Consists of three distinct steps: 1. Identifying which atoms, with a user-specified atomic **symbol**, are located on the surface of **mol** rather than in the bulk. 2. Define a subset of the newly identified surface atoms using one of CAT's distribution algorithms. 3. Create and return a molecule where the atom subset defined in step 2 has its atomic symbols replaced with **symbol_new**. Examples -------- Replace 75% of all surface ``"Cl"`` atoms with ``"I"``. .. code:: python >>> from scm.plams import Molecule >>> from CAT.recipes import replace_surface >>> mol = Molecule(...) # Read an .xyz file >>> mol_new = replace_surface(mol, symbol='Cl', symbol_new='I', f=0.75) >>> mol_new.write(...) # Write an .xyz file The same as above, except this time the new ``"I"`` atoms are all deleted. .. code:: python >>> from scm.plams import Molecule >>> from CAT.recipes import replace_surface >>> mol = Molecule(...) # Read an .xyz file >>> mol_new = replace_surface(mol, symbol='Cl', symbol_new='I', f=0.75) >>> del_atom = [at for at in mol_new if at.symbol == 'I'] >>> for at in del_atom: ... mol_new.delete_atom(at) >>> mol_new.write(...) # Write an .xyz file Parameters ---------- mol : :class:`~scm.plams.mol.molecule.Molecule` The input molecule. symbol : :class:`str` or :class:`int` An atomic symbol or number defining the super-set of the surface atoms. symbol_new : :class:`str` or :class:`int` An atomic symbol or number which will be assigned to the new surface-atom subset. nth_shell : :class:`int` or :class:`~collections.abc.Iterable` [:class:`int`] One or more integers denoting along which shell-surface(s) to search. For example, if ``symbol = "Cd"`` then ``nth_shell = 0`` represents the surface, ``nth_shell = 1`` is the first sub-surface ``"Cd"`` shell and ``nth_shell = 2`` is the second sub-surface ``"Cd"`` shell. Using ``nth_shell = [1, 2]`` will search along both the first and second ``"Cd"`` sub-surface shells. Note that a :exc:`Zscm.plams.core.errors.MoleculeError` will be raised if the specified **nth_shell** is larger than the actual number of available sub-surface shells. f : :class:`float` The fraction of surface atoms whose atom types will be replaced with **symbol_new**. Must obey the following condition: :math:`0 < f \le 1`. mode : :class:`str` How the subset of surface atoms will be generated. Accepts one of the following values: * ``"random"``: A random distribution. * ``"uniform"``: A uniform distribution; maximizes the nearest-neighbor distance. * ``"cluster"``: A clustered distribution; minimizes the nearest-neighbor distance. displacement_factor : :class:`float` The smoothing factor :math:`n` for constructing a convex hull; should obey :math:`0 <= n <= 1`. Represents the degree of displacement of all atoms with respect to a spherical surface; :math:`n = 1` is a complete projection while :math:`n = 0` means no displacement at all. A non-zero value is generally recomended here, as the herein utilized :class:`~scipy.spatial.ConvexHull` class requires an adequate degree of surface-convexness, lest it fails to properly identify all valid surface points. \**kwargs : :data:`~typing.Any` Further keyword arguments for :func:`~CAT.attachment.distribution.distribute_idx`. Returns ------- :class:`~scm.plams.mol.molecule.Molecule` A new Molecule with a subset of its surface atoms replaced with **symbol_new**. See Also -------- :func:`~CAT.attachment.distribution.distribute_idx` Create a new distribution of atomic indices from **idx** of length :code:`f * len(idx)`. :func:`~nanoCAT.bde.identify_surface.identify_surface` Take a molecule and identify which atoms are located on the surface, rather than in the bulk. :func:`~nanoCAT.bde.identify_surface.identify_surface_ch` Identify the surface of a molecule using a convex hull-based approach. """ # Parse input arguments xyz = np.array(mol, dtype=float, ndmin=2, copy=False) atnum = to_atnum(symbol) atnum_new = to_atnum(symbol_new) # Define the surface-atom subset idx = np.fromiter((i for i, at in enumerate(mol) if at.atnum == atnum), dtype=int) try: idx_surface = idx[_collect_surface(xyz[idx], displacement_factor, nth_shell)] except MoleculeError as exc: nth = exc.args[0] if nth == 0: exc = MoleculeError( f"No atoms with atomic symbol {to_symbol(symbol)!r} available in " f"{mol.get_formula()!r}") else: exc = MoleculeError( f"No subsurface shell >= {nth!r} of atom {to_symbol(symbol)!r} " f"available in {mol.get_formula()!r}") exc.__cause__ = None raise exc try: idx_surface_subset = distribute_idx(xyz, idx_surface, f=f, mode=mode, **kwargs) except ValueError as ex: raise MoleculeError( "Failed to identify any surface atoms with atomic symbol " f"{to_symbol(symbol)!r} in {mol.get_formula()!r}") from ex else: idx_surface_subset += 1 # Update atomic symbols and return ret = mol.copy() for i in idx_surface_subset: ret[i].atnum = atnum_new return ret
def canonicalize_mol(mol: Molecule, inplace: bool = True) -> Optional[Molecule]: """Take a PLAMS molecule and sort its atoms based on their canonical rank. .. _rdkit.Chem.CanonicalRankAtoms: https://www.rdkit.org/docs/source/rdkit.Chem.rdmolfiles.html#rdkit.Chem.rdmolfiles.CanonicalRankAtoms Examples -------- .. code:: python >>> from scm.plams import Molecule, from_smiles # Methane >>> mol: Molecule = from_smiles('C') >>> print(mol) # doctest: +SKIP Atoms: 1 H 0.640510 0.640510 -0.640510 2 H 0.640510 -0.640510 0.640510 3 C 0.000000 0.000000 0.000000 4 H -0.640510 0.640510 0.640510 5 H -0.640510 -0.640510 -0.640510 >>> canonicalize_mol(mol) >>> print(mol) # doctest: +SKIP Atoms: 1 C 0.000000 0.000000 0.000000 2 H -0.640510 -0.640510 -0.640510 3 H -0.640510 0.640510 0.640510 4 H 0.640510 -0.640510 0.640510 5 H 0.640510 0.640510 -0.640510 Parameters ---------- mol : |plams.Molecule|_ A PLAMS molecule. inplace : bool If ``True``, perform an inplace update of **mol** rather than returning a new :class:`Molecule` instance. Returns ------- |plams.Molecule|_ Optional: if ``inplace=False``, return a copy of **mol** with its atoms sorted by their canonical rank. See Also -------- * rdkit.Chem.CanonicalRankAtoms_: Returns the canonical atom ranking for each atom of a molecule fragment. """ # noqa rdmol = molkit.to_rdmol(mol) idx_collection = Chem.CanonicalRankAtoms(rdmol) # Reverse sort Molecule.atoms by the atomic indices in idx_collection if inplace: mol.atoms = [ at for _, at in sorted(zip(idx_collection, mol.atoms), reverse=True) ] return else: ret = mol.copy() ret.atoms = [ at for _, at in sorted(zip(idx_collection, ret.atoms), reverse=True) ] return ret
def get_lig_charge(ligand: Molecule, desired_charge: float, ligand_idx: Union[None, int, Iterable[int], slice] = None, invert_idx: bool = False, settings: Optional[Settings] = None, path: Union[None, str, PathLike] = None, folder: Union[None, str, PathLike] = None) -> pd.Series: """Calculate and rescale the **ligand** charges using MATCH_. The atomic charges in **ligand_idx** wil be altered such that the molecular charge of **ligand** is equal to **desired_charge**. .. _MATCH: http://brooks.chem.lsa.umich.edu/index.php?page=match&subdir=articles/resources/software Examples -------- .. code:: python >>> import pandas as pd >>> from scm.plams import Molecule >>> from CAT.recipes import get_lig_charge >>> ligand = Molecule(...) >>> desired_charge = 0.66 >>> ligand_idx = 0, 1, 2, 3, 4 >>> charge_series: pd.Series = get_lig_charge( ... ligand, desired_charge, ligand_idx ... ) >>> charge_series.sum() == desired_charge True Parameters ---------- ligand : :class:`~scm.plams.core.mol.molecule.Molecule` The input ligand. desired_charge : :class:`float` The desired molecular charge of the ligand. ligand_idx : :class:`int` or :class:`~collections.abc.Iterable` [:class:`int`], optional An integer or iterable of integers representing atomic indices. The charges of these atoms will be rescaled; all others will be frozen with respect to the MATCH output. Setting this value to ``None`` means that *all* atomic charges are considered variable. Indices should be 0-based. invert_idx : :class:`bool` If ``True`` invert **ligand_idx**, *i.e.* all atoms specified therein are now threated as constants and the rest as variables, rather than the other way around. settings : :class:`~scm.plams.core.settings.Settings`, optional The input settings for :class:`~nanoCAT.ff.match_job.MatchJob`. Will default to the ``"top_all36_cgenff_new"`` forcefield if not specified. path : :class:`str` or :class:`~os.PathLike`, optional The path to the PLAMS workdir as passed to :func:`~scm.plams.core.functions.init`. Will default to the current working directory if ``None``. folder : :class:`str` or :class:`~os.PathLike`, optional The name of the to-be created to the PLAMS working directory as passed to :func:`~scm.plams.core.functions.init`. Will default to ``"plams_workdir"`` if ``None``. Returns ------- :class:`pd.Series` [:class:`str`, :class:`float`] A Series with the atom types of **ligand** as keys and atomic charges as values. See Also -------- :class:`MatchJob` A :class:`~scm.plams.core.basejob.Job` subclass for interfacing with MATCH_: Multipurpose Atom-Typer for CHARMM. """ # noqa if settings is None: settings = Settings() settings.input.forcefield = 'top_all36_cgenff_new' # Run the MATCH Job init(path, folder) ligand = ligand.copy() run_match_job(ligand, settings, action='raise') finish() # Extract the charges and new atom types count = len(ligand) charge = np.fromiter((at.properties.charge_float for at in ligand), count=count, dtype=float) symbol = np.fromiter((at.properties.symbol for at in ligand), count=count, dtype='<U4') # Identify the atom subset idx = _parse_ligand_idx(ligand_idx) if invert_idx: idx = _invert_idx(idx, count) try: idx_len = len(idx) # type: ignore except TypeError: # idx is a slice object idx_len = len(charge[idx]) # Correct the charges and return charge[idx] -= (charge.sum() - desired_charge) / idx_len return pd.Series(charge, index=symbol, name='charge')