Пример #1
0
def get_frag_size(self, bond: Bond, anchor: Atom) -> int:
    """Return the size of the fragment containing **atom** if this instance was split into two by the breaking of **bond**.

    Parameters
    ----------
    bond : |plams.Bond|
        A PLAMS bond.

    anchor : |plams.Atom|
        A PLAMS atom. The size of the fragment containg this atom will be returned.

    Returns
    -------
    :class:`int`
        The number of atoms in the fragment containing **atom**.

    """  # noqa
    if bond not in self.bonds:
        raise MoleculeError(
            'get_frag_size: The argument bond should be of type plams.Bond and '
            'be part of the Molecule')
    elif anchor not in self.atoms:
        raise MoleculeError(
            'get_frag_size: The argument atom should be of type plams.Atom and '
            'be part of the Molecule')

    for at in self:
        at._visited = False

    frag1 = set()
    frag2 = set()

    def dfs(at1: Atom, atom_set: Set[Atom]):
        at1._visited = True
        atom_set.add(at1)
        for at2 in at1.neighbors():

            if at2._visited:
                continue
            dfs(at2, atom_set)

    bond.atom1._visited = bond.atom2._visited = True
    dfs(bond.atom1, frag1)
    dfs(bond.atom2, frag2)
    for at in self.atoms:
        del at._visited

    # fragment #1 contains **atom**
    if anchor in frag1:
        if bond.atom1 not in frag1:
            bond.atom1, bond.atom2 = bond.atom2, bond.atom1
        return len(frag1)

    if bond.atom1 not in frag2:
        bond.atom1, bond.atom2 = bond.atom2, bond.atom1
    return len(frag2)
Пример #2
0
def _multi_lig_anchor(qd_series, ligands, path, anchor,
                      allignment) -> np.ndarray:
    """Gogogo."""
    ret = np.empty((len(ligands), len(qd_series)), dtype=object)
    for i, qd in enumerate(qd_series):
        qd = qd.copy()

        for j, (ligand, atnum) in enumerate(zip(ligands, anchor)):
            qd.set_atoms_id()
            try:
                atoms = [at for at in qd if at.atnum == atnum]
                assert atoms
            except AssertionError as ex:
                raise MoleculeError(
                    f'Failed to identify {to_symbol(atnum)!r} in '
                    f'{qd.get_formula()!r}') from ex

            coords = Molecule.as_array(None, atom_subset=atoms)
            qd.properties.dummies = np.array(coords, ndmin=2, dtype=float)
            qd = ligand_to_qd(qd,
                              ligand,
                              path=path,
                              allignment=allignment,
                              idx_subset=qd.properties.indices)
            ret[j, i] = qd
            for at in reversed(atoms):
                qd.delete_atom(qd[at.id])
            qd.unset_atoms_id()
    return ret
Пример #3
0
def neighbors_mod(self,
                  atom: Atom,
                  exclude: Union[int, str] = 1) -> List[Atom]:
    """A modified PLAMS function: Allows the exlucison of specific elements from the return list.

    Return a list of neighbors of **atom** within the molecule.
    Atoms with **atom** has to belong to the molecule.
    Returned list follows the same order as the **atom.bond** attribute.

    Parameters
    ----------
    atom : |plams.Atom|_
        The plams atom whose neighbours will be returned.

    exclude : |str|_ or |int|_
        Exclude all neighbours with a specific atomic number or symbol.

    Returns
    -------
    |list|_ [|plams.Atom|_]
        A list of all neighbours of **atom**.

    """
    exclude = to_atnum(exclude)
    if atom.mol != self:
        raise MoleculeError(
            'neighbors: passed atom should belong to the molecule')
    return [
        b.other_end(atom) for b in atom.bonds
        if b.other_end(atom).atnum != exclude
    ]
Пример #4
0
def validate_core_atom(atom):  # noqa: E302
    """Parse and validate the ``["optional"]["qd"]["dissociate"]["core_atom"]`` argument."""
    # Potential atomic number or symbol
    if isinstance(atom, int) or atom in PeriodicTable.symtonum:
        return to_atnum(atom)

    # Potential SMILES string
    try:
        mol = from_smiles(atom)
    except Exception as ex:
        raise ValueError(f'Failed to recognize {atom!r} as a valid atomic number, '
                         'atomic symbol or SMILES string') from ex

    # Double check the SMILES string:
    charge_dict = {}
    for at in mol:
        charge = at.properties.charge
        try:
            charge_dict[charge] += 1
        except KeyError:
            charge_dict[charge] = 1
    if 0 in charge_dict:
        del charge_dict[0]

    # Only a single charged atom is allowed
    if len(charge_dict) > 1:
        charge_count = sum([v for v in charge_dict.values()])
        raise MoleculeError(f'The SMILES string {repr(atom)} contains more than one charged atom: '
                            f'charged atom count: {charge_count}')
    return mol
Пример #5
0
def _get_anchor(mol: Molecule) -> Tuple[int, Atom]:
    """Find the first atom in **mol** marked with :attr:`Atom.properties` ``["anchor"]``.

    Parameters
    ----------
    |plams.Molecule|_
        A PLAMS molecule containing (at least) one atom marked whose :attr:`Atom.properties`
        attribute contains the ``["anchor"]`` key.

    Returns
    -------
    |int|_ and |plams.Atom|_
        The (0-based) index and the matching Atom

    Raises
    ------
    MoleculeError
        Raised if no atom with the :attr:`Atom.properties` ``["anchor"]`` key is found.

    """
    for i, at in enumerate(mol.atoms, 1):
        if at.properties.anchor:
            return i, at

    raise MoleculeError("No atom with the Atom.properties.anchor found")
Пример #6
0
def _evaluate_distance(mol: np.ndarray, name: str,
                       threshold: float = 1.0,
                       action: str = 'warn') -> Union[None, NoReturn]:
    """Eavluate all the distance matrix of **xyz3D** and perform **action** when distances are below **threshold**."""  # noqa
    try:
        action_func = WARN_MAP[action]
    except KeyError as ex:
        raise ValueError("'action' expected either 'warn', 'raise' or 'ignore'; "
                         f"observed value: {action!r}") from ex
    else:
        if action == 'ignore':
            return None

    xyz = np.asarray(mol)

    tree = cKDTree(xyz)
    dist, idx = tree.query(xyz, k=[2])
    dist = dist.T[0]
    idx = idx.T[0]

    bool_ar = dist < threshold
    if bool_ar.any():
        _idx2 = np.stack([np.arange(len(idx)), idx]).T
        _idx2 += 1
        _idx2.sort(axis=1)

        idx2 = np.unique(_idx2[bool_ar], axis=0)
        n = len(idx2)

        exc = MoleculeError(
            f"\nEncountered >= {n} unique atom-pairs at a distance shorter than "
            f"{threshold} Angstrom in {name!r}:\n{aNDRepr.repr(idx2.T)}"
        )
        action_func(exc)
Пример #7
0
def _collect_surface(xyz: np.ndarray,
                     displacement_factor: float,
                     nth_shell: Union[int, Iterable[int]] = 0) -> np.ndarray:
    """Collect all user-specified surface and/or sub-surface shells of **xyz**."""
    n = -1
    xyz_df = pd.DataFrame(xyz)
    n_set = {nth_shell
             } if not isinstance(nth_shell, abc.Iterable) else set(nth_shell)

    ret = []
    while True:
        n += 1

        # Check if the DataFrame is empty
        if not xyz_df.size:
            raise MoleculeError(n)

        idx = identify_surface_ch(xyz_df, n=displacement_factor)
        bool_ar = np.ones(len(xyz_df), dtype=bool)
        bool_ar[idx] = False

        if n not in n_set:
            xyz_df = xyz_df.loc[bool_ar, :]
            continue
        else:
            index = xyz_df.index[~bool_ar]
            xyz_df = xyz_df.loc[bool_ar, :]

        n_set.remove(n)
        ret += index.tolist()
        if not n_set:
            return np.fromiter(ret, count=len(ret), dtype=int)
Пример #8
0
def degree_of_separation(mol: Molecule,
                         limit: float = np.inf,
                         bond_mat: Optional[np.ndarray] = None) -> np.ndarray:
    r"""Construct a matrix with the degree of separation of all atom-pairs in **mol**.

    Each element :math:`D_{i, j}^{\text{sep}}` in the to-be returned matrix
    :math:`\boldsymbol{D}^{\text{sep}} \in \mathbb{R}_{+}^{n,n}`
    represents the (minimum) number of bonds
    seperating atoms :math:`i` and :math:`j`.

    The degree of seperation is set to ``inf`` if two atoms are disjointed,
    *i.e.* there is no combination of bonds connecting aformentioned atom-pair.

    Parameters
    ----------
    mol : :class:`Molecule` or :class:`MultiMolecule`
        A PLAMS Molecule with bonds.

    limit : :class:`float`
        The maximum degree of separation to calculate, must be >= 0.
        Using a smaller limit will decrease computation time by aborting calculations between
        pairs that are separated by a degree of separation > limit.
        For such pairs, the degree of separation will be equal to ``inf`` (*i.e.* not connected).

    bond_mat : array_like, optional
        An optional bond matrix or other object compatible with SciPy's
        :class:`csr_matrix<scipy.sparse.csr_matrix>`.
        If ``None``, calculate the sparse bond matrix with :func:`get_sparse_bond_matrix`.

    Returns
    -------
    (:math:`n`, :math:`n`) :class:`numpy.ndarray` [:class:`float`]
        A symmetric 2D array containing the degrees of separation between all atom-pairs in **mol**.
        Values are set to ``inf`` if the responsible atoms are disjointed.

    Raises
    ------
    :exc:`MoleculeError<scm.plams.core.errors.MoleculeError>`
        Raised if the passed Molecule has no bonds and **bond_mat** is ``None``.

    """
    len_mol = len(mol)

    if bond_mat is None:
        if not mol.bonds:
            raise MoleculeError("The passed Molecule has no bonds")
        sparse_bond_mat = sparse_bond_matrix(mol, dtype=bool)
    else:
        sparse_bond_mat = csr_matrix(bond_mat,
                                     dtype=bool,
                                     copy=False,
                                     shape=(len_mol, len_mol))

    return dijkstra(sparse_bond_mat,
                    directed=False,
                    limit=limit,
                    indices=np.arange(len_mol),
                    return_predecessors=False)
Пример #9
0
def _get_xyz(mol: Molecule, atom: AtomSymbol) -> np.ndarray:
    """Return the Cartesian coordinates of **mol** belonging to the atom subset of *atom*."""
    atnum = to_atnum(atom)
    xyz = mol.as_array(atom_subset=(at for at in mol if at.atnum == atnum))

    if not xyz.any():
        raise MoleculeError(f"No atoms with atomic symbol {to_symbol(atom)!r} "
                            f"in {mol.get_formula()!r}")
    return xyz
Пример #10
0
def get_index(self, value: Union[Atom, Bond]) -> Union[int, Tuple[int, int]]:
    """Return the first index of **value** within this instance.

    **value** expects an instance of either :class:`Atom` or :class:`Bond`.

    Note
    ----
    Following the convention addopted by PLAMS, the returned index/indices are 1-based rather
    than 0-based.

    Parameters
    ----------
    value : |plams.Atom|_ or |plams.Bond|_
        A PLAMS atom or bonds.

    Returns
    -------
    |int|_ or |tuple|_ [|int|_]
        An atomic index or (**value**: |plams.Atom|_) or
        a tuple of two atomic indices (**item**: |plams.Bond|_).

    Raises
    ------
    TypeError
        Raised if **value** is an instance of neither :class:`Atom` nor :class:`Bond`.

    MoleculeError
        Raised if the passed :class:`Atom` or :class:`Bond` is not in this instance.

    """
    if isinstance(value, Atom):
        if value not in self.atoms:
            raise MoleculeError(
                "Passed atom, {repr(value)}, is not in this instance")
        return 1 + self.atoms.index(value)
    elif isinstance(value, Bond):
        if value not in self.bonds:
            raise MoleculeError(
                f"Passed bond, {repr(value)}, is not in this instance")
        at1, at2 = value
        return 1 + self.atoms.index(at1), 1 + self.atoms.index(at2)

    err = "item excepts an instance of 'Atom' or 'Bond'; observed type: '{}'"
    raise TypeError(err.format(value.__class__.__name__))
Пример #11
0
def prep_core(core_df: SettingsDataFrame) -> SettingsDataFrame:
    """Function that handles the identification and marking of all core anchor atoms.

    Parameters
    ----------
    core_df : |CAT.SettingsDataFrame|_
        A dataframe of core molecules. Molecules are stored in the *mol* column.

    Returns
    -------
    |CAT.SettingsDataFrame|_
        A dataframe of cores with all anchor atoms removed.

    """
    # Unpack arguments
    anchor = core_df.settings.optional.core.anchor
    subset = core_df.settings.optional.core.subset

    idx_tuples = []
    for core in core_df[MOL]:
        # Checks the if the anchor is a string (atomic symbol) or integer (atomic number)
        formula = core.get_formula()

        # Returns the indices of all anchor atom ligand placeholders in the core
        if not core.properties.dummies:
            at_idx = np.array(
                [i for i, atom in enumerate(core) if atom.atnum == anchor])
        else:
            dummies = core.properties.dummies
            at_idx = np.fromiter(dummies, count=len(dummies), dtype=int)
            at_idx -= 1
        if subset:
            at_idx = distribute_idx(core, at_idx, **subset)

        # Convert atomic indices into Atoms
        at_idx += 1
        at_idx.sort()
        core.properties.dummies = dummies = [core[i] for i in at_idx]

        # Returns an error if no anchor atoms were found
        if not dummies:
            raise MoleculeError(
                f"{repr(to_symbol(anchor))} was specified as core anchor atom, yet "
                f"no matching atoms were found in {core.properties.name} "
                f"(formula: {formula})")

        # Delete all core anchor atoms
        for at in dummies:
            core.delete_atom(at)
        idx_tuples.append((formula, ' '.join(at_idx.astype(str))))

    # Create and return a new dataframe
    idx = pd.MultiIndex.from_tuples(idx_tuples, names=['formula', 'anchor'])
    ret = core_df.reindex(idx)
    ret[MOL] = core_df[MOL].values
    return ret
Пример #12
0
def allign_axis(mol: Molecule, anchor: Atom):
    """Allign a molecule with the Cartesian X-axis; setting **anchor** as the origin."""
    try:
        idx = mol.atoms.index(anchor)
    except ValueError as ex:
        raise MoleculeError("The passed anchor is not in mol") from ex

    xyz = mol.as_array()  # Allign the molecule with the X-axis
    rotmat = optimize_rotmat(xyz, idx)
    xyz[:] = xyz @ rotmat.T
    xyz -= xyz[idx]
    xyz[:] = xyz.round(decimals=3)
    mol.from_array(xyz)
Пример #13
0
def _get_neutral_frag(frag: Molecule) -> Molecule:
    """Return a neutral fragment for :func:`md_generator`."""
    frag_neutral = frag.copy()
    for anchor in frag_neutral:
        if anchor.properties.anchor:
            charge = anchor.properties.charge

            if charge > 0:
                run_ff_cationic(frag_neutral, anchor, MATCH_SETTINGS)
            elif charge < 0:
                run_ff_anionic(frag_neutral, anchor, MATCH_SETTINGS)
            break
    else:
        raise MoleculeError("Failed to identify the anchor atom within 'frag'")

    return frag_neutral
Пример #14
0
def _get_surface(
    mol: Molecule,
    symbol: str,
    displacement_factor: float = 0.5,
) -> _NDArray[np.int64]:
    """Return the indices of all atoms, whose atomic symbol is equal to **atom_symbol**, located on the surface."""  # noqa
    # Identify all atom with atomic symbol **atom_symbol**
    atnum = to_atnum(symbol)
    idx = np.array([i for i, atom in enumerate(mol) if atom.atnum == atnum], dtype=np.int64)
    xyz = np.asarray(mol)

    # Identify all atoms on the surface
    try:
        return idx[identify_surface_ch(xyz[idx], n=displacement_factor)]
    except ValueError as ex:
        raise MoleculeError(f"No atoms with atomic symbol/number {repr(symbol)} available in "
                            f"{mol.get_formula()!r}") from ex
Пример #15
0
def residue_argsort(mol, concatenate=True):  # noqa: E302
    """Return the indices that would sort this instance by residue number.

    Residues are defined based on moleculair fragments based on **self.bonds**.

    Parameters
    ----------
    mol : :class:`~scm.plams.mol.molecule.Molecule`
        A PLAMS Molecule with bonds.
    concatenate : bool
        If ``False``, returned a nested list with atomic indices.
        Each sublist contains the indices of a single residue.

    Returns
    -------
    :class:`numpy.ndarray` [:class:`int`], shape :math:`(n,)` or :class:`list` [:class:`list` [:class:`int`]]
        A 1D array of indices that would sort :math:`n` atoms this instance
        or a nested list of indices.

    """  # noqa: E501
    if not mol.bonds:
        raise MoleculeError("The passed Molecule has no bonds")

    # Define residues
    frags = separate_mod(mol)
    symbol = np.fromiter((at.symbol for at in mol), count=len(mol), dtype='U2')

    # Sort the residues
    core = []
    ligands = []
    for frag in frags:
        if len(frag) == 1:
            core += frag
        else:
            i = np.array(frag)
            argsort = np.argsort(symbol[i])
            ligands.append(i[argsort].tolist())
    core.sort()
    ligands.sort()

    ret = [core] + ligands
    if concatenate:
        return np.concatenate(ret)
    return ret
Пример #16
0
def _parse_ion(ion: Molecule | str | int) -> Tuple[Molecule, Atom]:
    """Interpret and parse the **ion** argument in :func:`.get_xyn`.

    Construct and return a new :math:`XY_{n=0}` molecule and the atom :math:`X` itself.
    If **ion** is a polyatomic ion then :math:`XY_{n=0}` is a copy of **ion** and :math:`X`
    is the first atom with a non-zero charge.

    Parameters
    ----------
    ion : |str|_, |int|_ or |plams.Molecule|_
        An ion (:math:`X`), be it mono- (*e.g.* atomic number or symbol) or poly-atomic.

    Returns
    -------
    |plams.Molecule|_ and |plams.Atom|_
        A :math:`XY_{n=0}` molecule and the the charged atom from :math:`X`.

    Raises
    ------
    MoleculeError
        Raised if ion is an instance of :math:`Molecule` but does not contain any charged atoms.

    """
    if isinstance(ion, Molecule):
        XYn = ion.copy()
        for i, at in enumerate(XYn, 1):
            if not at.properties.charge:
                continue

            # Found an atom with non-zero charge; return a copy
            ret = XYn.copy()
            return ret, ret[i]

        raise MoleculeError(
            "No atoms were found in 'ion' with a non-zero charge")

    else:
        # Ion is an atomic number or symbol
        X = Atom(atnum=to_atnum(ion))
        XYn = Molecule()
        XYn.add_atom(X)
        return XYn, X
Пример #17
0
def prep_input(
    arg: Settings
) -> Tuple[SettingsDataFrame, SettingsDataFrame, SettingsDataFrame]:
    """Interpret and extract the input settings. Returns a list of ligands and a list of cores.

    Parameters
    ----------
    |plams.Settings|_
        A settings object containing all (optional) arguments.

    Returns
    -------
    |tuple|_ [|CAT.SettingsDataFrame|_, |CAT.SettingsDataFrame|_, |CAT.SettingsDataFrame|_]
        A tuple containing the ligand, core and qd dataframes.

    """
    # Interpret arguments
    validate_input(arg, validate_only=False)

    # Read the input ligands and cores
    lig_list = read_mol(arg.get('input_ligands'))
    core_list = read_mol(arg.get('input_cores'))
    qd_list = read_mol(arg.get('input_qd'))

    is_qd = True if qd_list is not None else False

    # Raises an error if lig_list or core_list is empty
    if is_qd:
        if not qd_list:
            raise MoleculeError(
                'No valid input quantum dots were found, aborting run')
    else:
        if not lig_list:
            raise MoleculeError(
                'No valid input ligands were found, aborting run')
        elif not core_list:
            raise MoleculeError(
                'No valid input cores were found, aborting run')

    # Store the molecules in dataframes
    columns = pd.MultiIndex.from_tuples([MOL], names=['index', 'sub index'])

    if is_qd:
        ligand_df = core_df = None
        qd_df = SettingsDataFrame(index=pd.RangeIndex(len(qd_list)),
                                  columns=columns,
                                  settings=arg)
        qd_df[MOL] = qd_list
    else:
        qd_df = None
        ligand_df = SettingsDataFrame(index=pd.RangeIndex(len(lig_list)),
                                      columns=columns,
                                      settings=arg)

        core_df = SettingsDataFrame(index=pd.RangeIndex(len(core_list)),
                                    columns=columns.copy(),
                                    settings=arg)

        ligand_df[MOL] = lig_list
        core_df[MOL] = core_list

    return ligand_df, core_df, qd_df
Пример #18
0
def run_ff_cationic(mol: Molecule, anchor: Atom, s: Settings) -> None:
    r"""Assign neutral parameters to a cationic species (*e.g.* ammonium).

    Consists of 3 distinct steps:

    * **mol** is converted into two neutral fragments,
      *e.g.* ammonium is converted into two amines:
      :math:`N^+(R)_4 \rightarrow N(R)_3 + RN(H)_2`.
    * Parameters are guessed for both fragments (using MATCH_) and then recombined into **mol**.
    * The atomic charge of **anchor** is adjusted such that
      the total moleculair charge becomes zero.

    Performs an inplace update of **mol**.

    .. _MATCH: http://brooks.chem.lsa.umich.edu/index.php?page=match&subdir=articles/resources/software

    Parameters
    ----------
    mol : :class:`Molecule<scm.plams.mol.molecule.Molecule>`
        A cationic molecule.

    anchor : :class:`Atom<scm.plams.mol.atom.Atom>`
        The atom in **mol** with the formal positive charge.

    s : :class:`Settings<scm.plams.core.settings.Settings>`
        The job Settings to-be passed to :class:`MATCHJob<nanoCAT.ff.match_job.MATCHJob>`.

    See Also
    --------
    :func:`run_match_job()<nanoCAT.ff.ff_assignment.run_match_job>`
        Assign atom types and charges to **mol** based on the results of MATCH_.

    :func:`run_ff_anionic()<nanoCAT.ff.ff_anionic.run_ff_anionic>`
        Assign neutral parameters to an anionic species (*e.g.* carboxylate).

    """  # noqa
    if anchor not in mol:
        raise MoleculeError("Passed 'anchor' is not part of 'mol'")
    anchor.properties.charge = 0

    # Find the first bond attached to the anchor atom which is not part of a ring
    for bond in anchor.bonds:
        if not mol.in_ring(bond):
            break
    else:
        raise MoleculeError(
            "All bonds attached to 'anchor' are part of a ring system")

    with SplitMol(mol, bond) as (frag1, frag2):
        # Identify the amine and the alkylic fragment
        if anchor in frag1:
            amine = frag1
            alkyl = frag2
        else:
            amine = frag2
            alkyl = frag1
        amine.delete_atom(anchor.bonds[-1].other_end(anchor))

        # Change the capping hydrogen into X
        # X is the same atom type as **anchor**
        alkyl_cap = alkyl[-1]
        alkyl_cap.atnum = anchor.atnum
        cap_bond = alkyl_cap.bonds[0]
        bond_length = alkyl_cap.radius + cap_bond.other_end(alkyl_cap).radius
        cap_bond.resize(alkyl_cap, bond_length)

        # Change X into XH_n
        alkyl_with_h = add_Hs(alkyl)
        properties = mol[1].properties
        for at in alkyl_with_h.atoms[len(alkyl):]:
            cap_h = Atom(atnum=at.atnum,
                         coords=at.coords,
                         mol=alkyl,
                         settings=properties.copy())
            cap_h.properties.pdb_info.IsHeteroAtom = False
            cap_h.properties.pdb_info.Name = 'Hxx'
            alkyl.add_atom(cap_h)
            alkyl.add_bond(Bond(alkyl_cap, cap_h, mol=alkyl))

        # Get the match parameters
        run_match_job(amine, s)
        run_match_job(alkyl, s)

    # Set the total charge of the system to 0
    anchor.properties.charge_float -= sum(at.properties.charge_float
                                          for at in mol)
    return None
Пример #19
0
def prep_ligand(ligand_df: SettingsDataFrame) -> SettingsDataFrame:
    """Function that handles all ligand operations.

    * Ligand function group identification
    * Ligand geometry optimization
    * Ligand bulkiness calculations
    * Ligand COSMO-RS calculations
    * Ligand conceptual DFT calculations

    .. _Nano-CAT: https://github.com/nlesc-nano/nano-CAT

    Parameters
    ----------
    ligand_df : |CAT.SettingsDataFrame|_
        A dataframe of ligand molecules. Molecules are stored in the *mol* column.

    Returns
    -------
    |CAT.SettingsDataFrame|_
        A new dataframe containing only valid ligands.

    Raises
    ------
    ImportError
        Raised if a COSMO-RS calculation is attempted without installing the Nano-CAT_ package.

    """
    # Unpack arguments
    forcefield = ligand_df.settings.optional.forcefield
    optimize = ligand_df.settings.optional.ligand.optimize
    crs = ligand_df.settings.optional.ligand.crs
    cdft = ligand_df.settings.optional.ligand.cdft

    # Identify functional groups within the ligand.
    ligand_df = init_ligand_anchoring(ligand_df)

    # Check if any valid functional groups were found
    if not ligand_df[MOL].any():
        raise MoleculeError(
            'No valid functional groups found in any of the ligands, aborting run'
        )

    # Optimize the ligands
    if optimize:
        init_ligand_opt(ligand_df)

        # Remove failed optimizations from the ligand list
        _is_opt = (lig.properties.get('is_opt', False)
                   for lig in ligand_df[MOL])
        is_opt = np.fromiter(_is_opt, count=len(ligand_df), dtype=bool)
        ligand_df = ligand_df.loc[is_opt]
    else:
        for lig in ligand_df[MOL]:
            allign_axis(lig, lig.properties.dummies)

    # Perform a COSMO-RS calculation on the ligands
    if crs:
        val_nano_cat(
            "Ligand COSMO-RS calculations require the nano-CAT package")
        init_solv(ligand_df)

    # Assign CHARMM CGenFF atom types to all ligands
    if forcefield:
        val_nano_cat(
            "Automatic ligand forcefield assignment requires MATCH "
            "(Multipurpose Atom-Typer for CHARMM) and the nano-CAT package")
        init_ff_assignment(ligand_df)

    # Run conceptual DFT calculations
    if cdft:
        val_nano_cat(
            "Ligand conceptual DFT calculations require the nano-CAT package")
        init_cdft(ligand_df)

    return ligand_df
Пример #20
0
def prep_qd(ligand_df: Optional[SettingsDataFrame],
            core_df: Optional[SettingsDataFrame],
            qd_df: Optional[SettingsDataFrame]) -> SettingsDataFrame:
    """Function that handles all quantum dot (qd, i.e. core + all ligands) operations.

    * Constructing the quantum dots
    * Optimizing the quantum dots
    * Peforming activation strain analyses
    * Dissociating ligands on the quantum dot surface

    .. _Nano-CAT: https://github.com/nlesc-nano/nano-CAT

    Has two accepted signatures:
        * ``ligand_df = core_df = None``: Update an existing quantum dot dataframe (**qd_df**).
        * ``qd_df = None``: Construct a new quantum dot dataframe from
          **ligand_df** and **core_df**.

    Parameters
    ----------
    ligand_df : |CAT.SettingsDataFrame|_
        ``None`` or a dataframe of ligand molecules. Molecules are stored in the *mol* column.

    core_df : |CAT.SettingsDataFrame|_
        ``None`` or a dataframe of core molecules. Molecules are stored in the *mol* column.

    qd_df : |CAT.SettingsDataFrame|_
        ``None`` or a dataframe of quantum dots. Molecules are stored in the *mol* column.

    Returns
    -------
    |CAT.SettingsDataFrame|_
        A dataframe of quantum dots molecules. Molecules are stored in the *mol* column.

    Raises
    ------
    ImportError
        Raised if an activation-strain or ligand dissociation calculation is attempted without
        installing the Nano-CAT_ package.

    """
    # Unpack arguments
    bulk = ligand_df.settings.optional.qd.bulkiness
    optimize = ligand_df.settings.optional.qd.optimize
    forcefield = ligand_df.settings.optional.forcefield
    dissociate = ligand_df.settings.optional.qd.dissociate
    activation_strain = ligand_df.settings.optional.qd.activation_strain
    construct_qd = ligand_df.settings.optional.qd.construct_qd
    multi_ligand = ligand_df.settings.optional.qd.multi_ligand

    # Construct the quantum dot DataFrame
    # If construct_qd is False, construct the dataframe without filling it with quantum dots
    if qd_df is None:  # Construct new quantum dots
        qd_df = init_qd_construction(ligand_df,
                                     core_df,
                                     construct_qd=construct_qd)
    elif ligand_df is core_df is None:  # Update existing quantum dots
        update_qd_df(qd_df)
    else:
        raise TypeError("Either qd_df must be 'None' or ligand_df "
                        " and core_df must both be 'None'")

    # Start the ligand bulkiness workflow
    if bulk:
        val_nano_cat(
            "Ligand bulkiness calculations require the nano-CAT package")
        init_lig_bulkiness(qd_df, ligand_df, core_df)

    # Skip the actual quantum dot construction
    if not construct_qd:
        return qd_df

    if not qd_df[MOL].any():
        raise MoleculeError('No valid quantum dots found, aborting')

    if forcefield:
        val_nano_cat(
            "Automatic ligand forcefield assignment requires MATCH "
            "(Multipurpose Atom-Typer for CHARMM) and the nano-CAT package")

    # Optimize the qd with the core frozen
    if optimize:
        init_qd_opt(qd_df)

    if multi_ligand:
        init_multi_ligand(qd_df)

    # Calculate the interaction between ligands on the quantum dot surface
    if activation_strain:
        val_nano_cat(
            "Quantum dot activation-strain calculations require the nano-CAT package"
        )
        init_asa(qd_df)

    # Calculate the interaction between ligands on the quantum dot surface upon removal of CdX2
    if dissociate:
        val_nano_cat(
            "Quantum dot ligand dissociation calculations require the nano-CAT package"
        )
        # Start the BDE calculation
        logger.info('Calculating ligand dissociation energy')
        init_bde(qd_df)

    return qd_df
Пример #21
0
 def _find_start(mol_list: Iterable[Molecule], atom: Atom) -> Molecule:
     """Find the molecule in **mol_list** containing **atom**."""
     for mol in mol_list:
         if atom in mol:
             return mol
     raise MoleculeError(f"{atom!r} is not in any of the passed molecules")
Пример #22
0
def dissociate_bulk(
    mol: Molecule,
    symbol_x: str,
    symbol_y: None | str = None,
    count_x: int = 1,
    count_y: int = 1,
    n_pairs: int = 1,
    k: None | int = 4,
    r_max: None | float = None,
    mode: _ModeKind = 'uniform',
    **kwargs: Any,
) -> Molecule:
    r"""A workflow for removing :math:`XY`-based compounds from the bulk of **mol**.

    Examples
    --------
    .. code-block:: python

        >>> from scm.plams import Molecule
        >>> from CAT.recipes import dissociate_bulk

        >>> mol: Molecule = ...

        # Remove two PbBr2 pairs in a system where
        # each lead atom is surrounded by 6 bromides
        >>> mol_out1 = dissociate_bulk(
        ...     mol, symbol_x="Pb", symbol_y="Br", count_y=2, n_pairs=2, k=6
        ... )

        # The same as before, expect all potential bromides are
        # identified based on a radius, rather than a fixed number
        >>> mol_out2 = dissociate_bulk(
        ...     mol, symbol_x="Pb", symbol_y="Br", count_y=2, n_pairs=2, r_max=5.0
        ... )

        # Convert a fraction to a number of pairs
        >>> f = 0.5
        >>> count_x = 2
        >>> symbol_x = "Pb"
        >>> n_pairs = int(f * sum(at.symbol == symbol_x for at in mol) / count_x)

        >>> mol_out3 = dissociate_bulk(
        ...     mol, symbol_x="Pb", symbol_y="Br", count_y=2, n_pairs=n_pairs, k=6
        ... )

    Parameters
    ----------
    mol : :class:`~scm.plams.mol.molecule.Molecule`
        The input molecule.
    symbol_x : :class:`str` or :class:`int`
        The atomic symbol or number of the central (to-be dissociated) atom(s) :math:`X`.
    symbol_y : :class:`str` or :class:`int`, optional
        The atomic symbol or number of the surrounding (to-be dissociated) atom(s) :math:`Y`.
        If :data:`None`, do not dissociate any atoms :math:`Y`.
    count_x : :class:`int`
        The number of central atoms :math:`X` per individual to-be dissociated cluster.
    count_y : :class:`int`
        The number of surrounding atoms :math:`Y` per individual to-be dissociated cluster.
    n_pairs : :class:`int`
        The number of to-be removed :math:`XY` fragments.
    k : :class:`int`, optional
        The total number of :math:`Y` candidates surrounding each atom :math:`X`.
        This value should be smaller than or equal to **count_y**.
        See the **r_max** parameter for a radius-based approach;
        note that both parameters are not mutually exclusive.
    r_max : :class:`int`, optional
        The radius used for searching for :math:`Y` candidates surrounding each atom :math:`X`.
        See **k** parameter to use a fixed number of nearest neighbors;
        note that both parameters are not mutually exclusive.
    mode : :class:`str`
        How the subset of to-be removed atoms :math:`X` should be generated.
        Accepts one of the following values:

        * ``"random"``: A random distribution.
        * ``"uniform"``: A uniform distribution; the distance between each successive atom and
          all previous points is maximized.
        * ``"cluster"``: A clustered distribution; the distance between each successive atom and
          all previous points is minmized.

    Keyword Arguments
    -----------------
    \**kwargs : :data:`~typing.Any`
        Further keyword arguments for :func:`CAT.distribution.distribute_idx`.

    Returns
    -------
    :class:`~scm.plams.mol.molecule.Molecule`
        The molecule with :math:`n_{\text{pair}} * XY` fragments removed.

    """
    if n_pairs < 0:
        raise ValueError("`n_pairs` must be larger than or equal to 0; "
                         f"observed value: {n_pairs!r}")
    elif n_pairs == 0:
        return mol.copy()

    # Validate the input args
    if count_x <= 0:
        raise ValueError("`count_x` expected a an integer larger than 0; "
                         f"observed value: {count_x!r}")
    elif count_y < 0:
        raise ValueError("`count_y` expected a an integer larger than 0; "
                         f"observed value: {count_y!r}")
    elif count_y == 0:
        symbol_y = None

    # Parse `symbol_x`
    atnum_x = to_atnum(symbol_x)
    idx_x = np.fromiter([i for i, at in enumerate(mol) if at.atnum == atnum_x], dtype=np.intp)
    if len(idx_x) == 0:
        raise MoleculeError(f"No atoms {to_symbol(atnum_x)!r} in {mol.get_formula()}")

    # Parse `symbol_y`
    if symbol_y is not None:
        atnum_y = to_atnum(symbol_y)
        idx_y = np.fromiter([i for i, at in enumerate(mol) if at.atnum == atnum_y], dtype=np.intp)
        if len(idx_y) == 0:
            raise MoleculeError(f"No atoms {to_symbol(atnum_y)!r} in {mol.get_formula()}")

    # Parse `k` and `r_max`
    if k is None and r_max is None:
        raise TypeError("`k` and `r_max` cannot be both set to None")
    if k is None:
        k = cast(int, len(idx_y))
    if r_max is None:
        r_max = cast(float, np.inf)

    # Validate `n_pairs`
    if (n_pairs * count_x) > len(idx_x):
        x = n_pairs * count_x
        raise ValueError(
            f"Insufficient atoms {to_symbol(atnum_x)!r} in {mol.get_formula()}; "
            f"atleast {x} required with `n_pairs={n_pairs!r}` and `count_x={count_x!r}"
        )
    elif symbol_y is not None and (n_pairs * count_y) > len(idx_y):
        y = n_pairs * count_y
        raise ValueError(
            f"Insufficient atoms {to_symbol(atnum_y)!r} in {mol.get_formula()}; "
            f"atleast {y} required with `n_pairs={n_pairs!r}` and `count_y={count_y!r}"
        )

    # Identify a subset of atoms `x`
    if count_x != 1:
        if "cluster_size" in kwargs:
            raise TypeError("Specifying both `cluster_size` and `count_x` is not supported")
        kwargs["cluster_size"] = count_x
    idx_x_sorted = distribute_idx(mol, idx_x, f=1, **kwargs)
    idx_x_sorted.shape = -1, count_x

    if symbol_y is not None:
        # Identify a subset of matching atoms `y`
        idx_x_subset, idx_y_subset, = _get_opposite_neighbor2(
            np.asarray(mol), idx_x_sorted, idx_y, n_pairs, n=count_y, k=k, r_max=r_max
        )

        # Concatenate the subsets
        idx_xy_subset = np.concatenate([idx_x_subset.ravel(), idx_y_subset.ravel()])
    else:
        idx_xy_subset = idx_x[:(n_pairs * count_x)]
    idx_xy_subset.sort()
    idx_xy_subset += 1

    # Create and return the new molecule
    ret = mol.copy()
    atom_set = {ret[i] for i in idx_xy_subset}
    for at in atom_set:
        ret.delete_atom(at)
    return ret
Пример #23
0
def run_ff_anionic(mol: Molecule, anchor: Atom, s: Settings) -> None:
    r"""Assign neutral parameters to an anionic species (*e.g.* carboxylate).

    Consists of 4 distinct steps:

    * **mol** is capped with a proton: *e.g.*
      :math:`RCO_2^- \rightarrow RCO_2H`.
    * Parameters are guessed for both fragments (using MATCH_) and then recombined into **mol**.
    * The capping proton is removed again.
    * The atomic charge of **anchor** is adjusted such that
      the total moleculair charge becomes zero.

    Performs an inplace update of **mol**.

    .. _MATCH: http://brooks.chem.lsa.umich.edu/index.php?page=match&subdir=articles/resources/software

    Parameters
    ----------
    mol : :class:`Molecule<scm.plams.mol.molecule.Molecule>`
        A cationic molecule.

    anchor : :class:`Atom<scm.plams.mol.atom.Atom>`
        The atom in **mol** with the formal negative charge.

    s : :class:`Settings<scm.plams.core.settings.Settings>`
        The job Settings to-be passed to :class:`MATCHJob<nanoCAT.ff.match_job.MATCHJob>`.

    See Also
    --------
    :func:`run_match_job()<nanoCAT.ff.ff_assignment.run_match_job>`
        Assign atom types and charges to **mol** based on the results of MATCH_.

    :func:`run_ff_cationic()<nanoCAT.ff.ff_cationic.run_ff_cationic>`
        Assign neutral parameters to a cationic species (*e.g.* ammonium).

    """  # noqa
    if anchor not in mol:
        raise MoleculeError("Passed 'anchor' is not part of 'mol'")
    anchor.properties.charge = 0

    # Cap the anion with a proton
    mol_with_h = add_Hs(mol)
    _cap_h = mol_with_h[-1]
    cap_h = Atom(atnum=_cap_h.atnum,
                 coords=_cap_h.coords,
                 mol=mol,
                 settings=mol[1].properties.copy())

    cap_h.properties.pdb_info.IsHeteroAtom = False
    cap_h.properties.pdb_info.Name = 'Hxx'
    mol.add_atom(cap_h)
    mol.add_bond(Bond(anchor, cap_h, mol=mol))

    # Guess parameters and remove the capping proton
    run_match_job(mol, s)
    mol.delete_atom(cap_h)

    # Set the total charge of the system to 0
    anchor.properties.charge_float -= sum(at.properties.charge_float
                                          for at in mol)
    return None
Пример #24
0
def replace_surface(mol: Molecule,
                    symbol: Union[str, int],
                    symbol_new: Union[str, int] = 'Cl',
                    nth_shell: Union[int, Iterable[int]] = 0,
                    f: float = 0.5,
                    mode: str = 'uniform',
                    displacement_factor: float = 0.5,
                    **kwargs: Any) -> Molecule:
    r"""A workflow for identifying all surface atoms in **mol** and replacing a subset of them.

    Consists of three distinct steps:

    1. Identifying which atoms, with a user-specified atomic **symbol**,
       are located on the surface of **mol** rather than in the bulk.
    2. Define a subset of the newly identified surface atoms using one of CAT's
       distribution algorithms.
    3. Create and return a molecule where the atom subset defined in step 2
       has its atomic symbols replaced with **symbol_new**.

    Examples
    --------
    Replace 75% of all surface ``"Cl"`` atoms with ``"I"``.

    .. code:: python

        >>> from scm.plams import Molecule
        >>> from CAT.recipes import replace_surface

        >>> mol = Molecule(...)  # Read an .xyz file
        >>> mol_new = replace_surface(mol, symbol='Cl', symbol_new='I', f=0.75)
        >>> mol_new.write(...)  # Write an .xyz file

    The same as above, except this time the new ``"I"`` atoms are all deleted.

    .. code:: python

        >>> from scm.plams import Molecule
        >>> from CAT.recipes import replace_surface

        >>> mol = Molecule(...)  # Read an .xyz file
        >>> mol_new = replace_surface(mol, symbol='Cl', symbol_new='I', f=0.75)

        >>> del_atom = [at for at in mol_new if at.symbol == 'I']
        >>> for at in del_atom:
        ...     mol_new.delete_atom(at)
        >>> mol_new.write(...)  # Write an .xyz file

    Parameters
    ----------
    mol : :class:`~scm.plams.mol.molecule.Molecule`
        The input molecule.

    symbol : :class:`str` or :class:`int`
        An atomic symbol or number defining the super-set of the surface atoms.

    symbol_new : :class:`str` or :class:`int`
        An atomic symbol or number which will be assigned to the new surface-atom subset.

    nth_shell : :class:`int` or :class:`~collections.abc.Iterable` [:class:`int`]
        One or more integers denoting along which shell-surface(s) to search.
        For example, if ``symbol = "Cd"`` then ``nth_shell = 0`` represents the surface,
        ``nth_shell = 1`` is the first sub-surface ``"Cd"`` shell and
        ``nth_shell = 2`` is the second sub-surface ``"Cd"`` shell.
        Using ``nth_shell = [1, 2]`` will search along both the first and second
        ``"Cd"`` sub-surface shells.
        Note that a :exc:`Zscm.plams.core.errors.MoleculeError` will be raised if
        the specified **nth_shell** is larger than the actual number of available
        sub-surface shells.

    f : :class:`float`
        The fraction of surface atoms whose atom types will be replaced with **symbol_new**.
        Must obey the following condition: :math:`0 < f \le 1`.

    mode : :class:`str`
        How the subset of surface atoms will be generated.
        Accepts one of the following values:

        * ``"random"``: A random distribution.
        * ``"uniform"``: A uniform distribution; maximizes the nearest-neighbor distance.
        * ``"cluster"``: A clustered distribution; minimizes the nearest-neighbor distance.

    displacement_factor : :class:`float`
        The smoothing factor :math:`n` for constructing a convex hull;
        should obey :math:`0 <= n <= 1`.
        Represents the degree of displacement of all atoms with respect to a spherical surface;
        :math:`n = 1` is a complete projection while :math:`n = 0` means no displacement at all.

        A non-zero value is generally recomended here,
        as the herein utilized :class:`~scipy.spatial.ConvexHull` class
        requires an adequate degree of surface-convexness,
        lest it fails to properly identify all valid surface points.

    \**kwargs : :data:`~typing.Any`
        Further keyword arguments for
        :func:`~CAT.attachment.distribution.distribute_idx`.

    Returns
    -------
    :class:`~scm.plams.mol.molecule.Molecule`
        A new Molecule with a subset of its surface atoms replaced with **symbol_new**.

    See Also
    --------
    :func:`~CAT.attachment.distribution.distribute_idx`
        Create a new distribution of atomic indices from **idx** of length :code:`f * len(idx)`.

    :func:`~nanoCAT.bde.identify_surface.identify_surface`
        Take a molecule and identify which atoms are located on the surface,
        rather than in the bulk.

    :func:`~nanoCAT.bde.identify_surface.identify_surface_ch`
        Identify the surface of a molecule using a convex hull-based approach.

    """
    # Parse input arguments
    xyz = np.array(mol, dtype=float, ndmin=2, copy=False)
    atnum = to_atnum(symbol)
    atnum_new = to_atnum(symbol_new)

    # Define the surface-atom subset
    idx = np.fromiter((i for i, at in enumerate(mol) if at.atnum == atnum),
                      dtype=int)
    try:
        idx_surface = idx[_collect_surface(xyz[idx], displacement_factor,
                                           nth_shell)]
    except MoleculeError as exc:
        nth = exc.args[0]
        if nth == 0:
            exc = MoleculeError(
                f"No atoms with atomic symbol {to_symbol(symbol)!r} available in "
                f"{mol.get_formula()!r}")
        else:
            exc = MoleculeError(
                f"No subsurface shell >= {nth!r} of atom {to_symbol(symbol)!r} "
                f"available in {mol.get_formula()!r}")
        exc.__cause__ = None
        raise exc

    try:
        idx_surface_subset = distribute_idx(xyz,
                                            idx_surface,
                                            f=f,
                                            mode=mode,
                                            **kwargs)
    except ValueError as ex:
        raise MoleculeError(
            "Failed to identify any surface atoms with atomic symbol "
            f"{to_symbol(symbol)!r} in {mol.get_formula()!r}") from ex
    else:
        idx_surface_subset += 1

    # Update atomic symbols and return
    ret = mol.copy()
    for i in idx_surface_subset:
        ret[i].atnum = atnum_new
    return ret