示例#1
0
def randomize_atoms(mol: Chem.rdchem.Mol) -> Optional[Chem.rdchem.Mol]:
    """Randomize the position of the atoms in a mol.

    Args:
        mol: a molecule.

    Returns:
        mol: a molecule.
    """
    if mol.GetNumAtoms() == 0:
        return mol

    atom_indices = list(range(mol.GetNumAtoms()))
    random.shuffle(atom_indices)
    return Chem.RenumberAtoms(mol, atom_indices)
示例#2
0
def reorder_atoms(
    mol: Chem.rdchem.Mol,
    break_ties: bool = True,
    include_chirality: bool = True,
    include_isotopes: bool = True,
) -> Optional[Chem.rdchem.Mol]:
    """Reorder the atoms in a mol. It ensures a single atom order for the same molecule,
    regardless of its original representation.

    Args:
        mol: a molecule.
        break_ties: Force breaking of ranked ties.
        include_chirality: Use chiral information when computing rank.
        include_isotopes: Use isotope information when computing rank.

    Returns:
        mol: a molecule.
    """
    if mol.GetNumAtoms() == 0:
        return mol

    new_order = Chem.CanonicalRankAtoms(
        mol,
        breakTies=break_ties,
        includeChirality=include_chirality,
        includeIsotopes=include_isotopes,
    )
    new_order = sorted([(y, x) for x, y in enumerate(new_order)])
    return Chem.RenumberAtoms(mol, [y for (x, y) in new_order])
    def complete_labels(mol: Chem.rdchem.Mol,
                        mollabels_dict: Dict,
                        mark_upmatched: bool = True) -> List:
        """
        Complete the gaps in the atom labels dictionary (normally a list), by given names like CX1.

        :param mol: the molecule to be labelled _in place_.
        :type mol: Chem.rdchem.Mol
        :param mollabels_dict: key is index (int) and value is name like for a normal atomlabels (but with gaps)
        :type mollabels_dict: Dict
        :param mark_upmatched: Add an X between the symbol and the number
        :type mark_upmatched: bool
        :return: atom labels
        :rtype: List[str]
        """
        mollabels = []
        counters = {}
        for i in range(mol.GetNumAtoms()):
            if i in mollabels_dict:
                mollabels.append(mollabels_dict[i])
            else:
                el = mol.GetAtomWithIdx(i).GetSymbol().upper()
                if el in counters:
                    counters[el] += 1
                else:
                    counters[el] = 1
                if mark_upmatched:
                    mollabels.append(f'{el}X{counters[el]}')
                else:
                    mollabels.append(el + str(counters[el]))
        return mollabels
    def label(mol: Chem.rdchem.Mol,
              atomlabels: List) -> None:  # -> mol inplace.
        """
        Assign the prop ``AtomLabel``... https://www.rdkit.org/docs/RDKit_Book.html

        :param mol: the molecule to be labelled _in place_.
        :type mol: Chem.rdchem.Mol
        :param atomlabels: atom labels
        :type atomlabels: List[str]
        :return: None
        """
        assert len(atomlabels) == mol.GetNumAtoms(
        ), 'the number of atoms in mol has to be the same as atomlabels. Hydrogens? dehydrogenate!'
        for idx in range(mol.GetNumAtoms()):
            mol.GetAtomWithIdx(idx).SetProp('AtomLabel', atomlabels[idx])
        return None
def with_message_passing(mol: Chem.rdchem.Mol):
    """
    Molecule with heavy atom(s)=1 or 2 is not processed message passing action. (Only for D-MPNN)
    """
    m_passing_vector = [0, 0, 0]  # m_passing_vector = [hv=1, hv=2, hv>2]
    num_atoms = mol.GetNumAtoms()
    if num_atoms == 1:
        m_passing_vector[0] = 1
    elif num_atoms == 2:
        m_passing_vector[1] = 1
    else:
        m_passing_vector[2] = 1

    return m_passing_vector
def num_bond_in_ring(mol: Chem.rdchem.Mol):
    """
    Check the number of the bonds that are in the ring, count vector.
    """
    count = 0
    n_atoms = mol.GetNumAtoms()
    for a1 in range(n_atoms):
        for a2 in range(a1 + 1, n_atoms):
            bond = mol.GetBondBetweenAtoms(a1, a2)
            if bond is None:
                continue
            elif bond.IsInRing():
                count += 1

    return [count]
 def display(mol: Chem.rdchem.Mol, show='name'):
     # show = 'index' | 'name'
     if show:
         atoms = mol.GetNumAtoms()
         mol = copy.deepcopy(mol)
         for idx in range(atoms):
             if show == 'index':
                 mol.GetAtomWithIdx(idx).SetProp('molAtomMapNumber',
                                                 str(idx))
             elif show == 'name':
                 raise NotImplementedError(
                     'I need to figure out what property is needed as molAtomMapNumber is an str(int)'
                 )
                 mol.GetAtomWithIdx(idx).SetProp(
                     'molAtomMapNumber',
                     str(mol.GetAtomWithIdx(idx).GetProp('AtomLabel')))
             else:
                 raise ValueError
     display(Draw.MolToImage(mol))
     return None
示例#8
0
    def createRDKITconf(self, mol: Chem.rdchem.Mol, conversionFactor: float = 0.1):
        """creates a PyGromosTools CNF type from a rdkit molecule. If a conformation exists the first one will be used.

        Parameters
        ----------
        mol : Chem.rdchem.Mol
            Molecule, possibly with a conformation

        conversionFactor  :  float
            the factor used to convert length from rdkit to Gromos
            (default: angstrom -> nano meter = 0.1)
        """
        inchi = Chem.MolToInchi(mol).split("/")
        if len(inchi) >= 2:
            name = inchi[1]
        else:
            name = "XXX"
        self.__setattr__("TITLE", TITLE("\t" + name + " created from RDKit"))

        # check if conformations exist else create a new one
        if mol.GetNumConformers() < 1:
            mol = Chem.AddHs(mol)
            AllChem.EmbedMolecule(mol)
            AllChem.UFFOptimizeMolecule(mol)
        conf = mol.GetConformer(0)

        # fill a list with atomP types from RDKit data
        atomList = []
        for i in range(mol.GetNumAtoms()):
            x = conversionFactor * conf.GetAtomPosition(i).x
            y = conversionFactor * conf.GetAtomPosition(i).y
            z = conversionFactor * conf.GetAtomPosition(i).z
            atomType = mol.GetAtomWithIdx(i).GetSymbol()
            atomList.append(blocks.atomP(resID=1, resName=name, atomType=atomType, atomID=i + 1, xp=x, yp=y, zp=z))

        # set POSITION attribute
        self.__setattr__("POSITION", blocks.POSITION(atomList))
        # Defaults set for GENBOX - for liquid sim adjust manually
        self.__setattr__("GENBOX", blocks.GENBOX(pbc=1, length=[4, 4, 4], angles=[90, 90, 90]))
示例#9
0
def tree_decomp(
        mol: Chem.rdchem.Mol) -> Tuple[List[List[int]], List[Tuple[int, int]]]:
    n_atoms = mol.GetNumAtoms()
    cliques = []
    for atom in mol.GetAtoms():
        if atom.GetDegree() == 0:
            cliques.append([atom.GetIdx()])

    for bond in mol.GetBonds():
        a1 = bond.GetBeginAtom().GetIdx()
        a2 = bond.GetEndAtom().GetIdx()
        if not bond.IsInRing():
            cliques.append([a1, a2])

    ssr = [list(x) for x in Chem.GetSymmSSSR(mol)]
    cliques.extend(ssr)

    nei_list = [[] for i in range(n_atoms)]
    for i in range(len(cliques)):
        for atom in cliques[i]:
            nei_list[atom].append(i)

    # Merge Rings with intersection > 2 atoms
    for i in range(len(cliques)):
        if len(cliques[i]) <= 2: continue
        for atom in cliques[i]:
            for j in nei_list[atom]:
                if i >= j or len(cliques[j]) <= 2: continue
                inter = set(cliques[i]) & set(cliques[j])
                if len(inter) > 2:
                    cliques[i].extend(cliques[j])
                    cliques[i] = list(set(cliques[i]))
                    cliques[j] = []

    cliques = [c for c in cliques if len(c) > 0]
    nei_list = [[] for i in range(n_atoms)]
    for i in range(len(cliques)):
        for atom in cliques[i]:
            nei_list[atom].append(i)

    # Build edges and add singleton cliques
    edges = defaultdict(int)
    for atom in range(n_atoms):
        if len(nei_list[atom]) <= 1:
            continue
        cnei = nei_list[atom]
        bonds = [c for c in cnei if len(cliques[c]) == 2]
        rings = [c for c in cnei if len(cliques[c]) > 4]
        if len(bonds) > 2 or (
                len(bonds) == 2 and len(cnei) > 2
        ):  # In general, if len(cnei) >= 3, a singleton should be added, but 1 bond + 2 ring is currently not dealt with.
            cliques.append([atom])
            c2 = len(cliques) - 1
            for c1 in cnei:
                edges[(c1, c2)] = 1
        elif len(rings) > 2:  # Multiple (n>2) complex rings
            cliques.append([atom])
            c2 = len(cliques) - 1
            for c1 in cnei:
                edges[(c1, c2)] = MST_MAX_WEIGHT - 1
        else:
            for i in range(len(cnei)):
                for j in range(i + 1, len(cnei)):
                    c1, c2 = cnei[i], cnei[j]
                    inter = set(cliques[c1]) & set(cliques[c2])
                    if edges[(c1, c2)] < len(inter):
                        edges[(c1, c2)] = len(
                            inter)  # cnei[i] < cnei[j] by construction

    edges = [u + (MST_MAX_WEIGHT - v, ) for u, v in edges.items()]
    if len(edges) == 0:
        return cliques, edges

    # Compute Maximum Spanning Tree
    row, col, data = zip(*edges)
    n_clique = len(cliques)
    clique_graph = csr_matrix((data, (row, col)), shape=(n_clique, n_clique))
    junc_tree = minimum_spanning_tree(clique_graph)
    row, col = junc_tree.nonzero()
    edges = [(row[i], col[i]) for i in range(len(row))]

    return cliques, edges
示例#10
0
def featurization(r_mol: Chem.rdchem.Mol,
                  p_mol: Chem.rdchem.Mol,
                  ):
    """
    Generates features of the reactant and product for one reaction as input for the network.

    Args:
        r_mol: RDKit molecule object for the reactant.
        p_mol: RDKit molecule object for the product.

    Returns:
        data: Torch Geometric Data object, storing the atom and bond features
    """

    # compute properties with rdkit (only works if dataset is clean)
    r_mol.UpdatePropertyCache()
    p_mol.UpdatePropertyCache()

    # fake the number of "atoms" if we are collapsing substructures
    n_atoms = r_mol.GetNumAtoms()

    # topological and 3d distance matrices
    tD_r = Chem.GetDistanceMatrix(r_mol)
    tD_p = Chem.GetDistanceMatrix(p_mol)
    D_r = Chem.Get3DDistanceMatrix(r_mol)
    D_p = Chem.Get3DDistanceMatrix(p_mol)

    f_atoms = list()        # atom (node) features
    edge_index = list()     # list of tuples indicating presence of bonds
    f_bonds = list()        # bond (edge) features

    for a1 in range(n_atoms):

        # Node features
        f_atoms.append(atom_features(r_mol.GetAtomWithIdx(a1)))

        # Edge features
        for a2 in range(a1 + 1, n_atoms):
            # fully connected graph
            edge_index.extend([(a1, a2), (a2, a1)])

            # for now, naively include both reac and prod
            b1_feats = [D_r[a1][a2], D_p[a1][a2]]
            b2_feats = [D_r[a2][a1], D_p[a2][a1]]

            # r_bond = r_mol.GetBondBetweenAtoms(a1, a2)
            # b1_feats.extend(bond_features(r_bond))
            # b2_feats.extend(bond_features(r_bond))
            #
            # p_bond = p_mol.GetBondBetweenAtoms(a1, a2)
            # b1_feats.extend(bond_features(p_bond))
            # b2_feats.extend(bond_features(p_bond))

            f_bonds.append(b1_feats)
            f_bonds.append(b2_feats)

    data = tg.data.Data()
    data.x = torch.tensor(f_atoms, dtype=torch.float)
    data.edge_index = torch.tensor(edge_index, dtype=torch.long).t().contiguous()
    data.edge_attr = torch.tensor(f_bonds, dtype=torch.float)

    return data