def randomize_atoms(mol: Chem.rdchem.Mol) -> Optional[Chem.rdchem.Mol]: """Randomize the position of the atoms in a mol. Args: mol: a molecule. Returns: mol: a molecule. """ if mol.GetNumAtoms() == 0: return mol atom_indices = list(range(mol.GetNumAtoms())) random.shuffle(atom_indices) return Chem.RenumberAtoms(mol, atom_indices)
def reorder_atoms( mol: Chem.rdchem.Mol, break_ties: bool = True, include_chirality: bool = True, include_isotopes: bool = True, ) -> Optional[Chem.rdchem.Mol]: """Reorder the atoms in a mol. It ensures a single atom order for the same molecule, regardless of its original representation. Args: mol: a molecule. break_ties: Force breaking of ranked ties. include_chirality: Use chiral information when computing rank. include_isotopes: Use isotope information when computing rank. Returns: mol: a molecule. """ if mol.GetNumAtoms() == 0: return mol new_order = Chem.CanonicalRankAtoms( mol, breakTies=break_ties, includeChirality=include_chirality, includeIsotopes=include_isotopes, ) new_order = sorted([(y, x) for x, y in enumerate(new_order)]) return Chem.RenumberAtoms(mol, [y for (x, y) in new_order])
def complete_labels(mol: Chem.rdchem.Mol, mollabels_dict: Dict, mark_upmatched: bool = True) -> List: """ Complete the gaps in the atom labels dictionary (normally a list), by given names like CX1. :param mol: the molecule to be labelled _in place_. :type mol: Chem.rdchem.Mol :param mollabels_dict: key is index (int) and value is name like for a normal atomlabels (but with gaps) :type mollabels_dict: Dict :param mark_upmatched: Add an X between the symbol and the number :type mark_upmatched: bool :return: atom labels :rtype: List[str] """ mollabels = [] counters = {} for i in range(mol.GetNumAtoms()): if i in mollabels_dict: mollabels.append(mollabels_dict[i]) else: el = mol.GetAtomWithIdx(i).GetSymbol().upper() if el in counters: counters[el] += 1 else: counters[el] = 1 if mark_upmatched: mollabels.append(f'{el}X{counters[el]}') else: mollabels.append(el + str(counters[el])) return mollabels
def label(mol: Chem.rdchem.Mol, atomlabels: List) -> None: # -> mol inplace. """ Assign the prop ``AtomLabel``... https://www.rdkit.org/docs/RDKit_Book.html :param mol: the molecule to be labelled _in place_. :type mol: Chem.rdchem.Mol :param atomlabels: atom labels :type atomlabels: List[str] :return: None """ assert len(atomlabels) == mol.GetNumAtoms( ), 'the number of atoms in mol has to be the same as atomlabels. Hydrogens? dehydrogenate!' for idx in range(mol.GetNumAtoms()): mol.GetAtomWithIdx(idx).SetProp('AtomLabel', atomlabels[idx]) return None
def with_message_passing(mol: Chem.rdchem.Mol): """ Molecule with heavy atom(s)=1 or 2 is not processed message passing action. (Only for D-MPNN) """ m_passing_vector = [0, 0, 0] # m_passing_vector = [hv=1, hv=2, hv>2] num_atoms = mol.GetNumAtoms() if num_atoms == 1: m_passing_vector[0] = 1 elif num_atoms == 2: m_passing_vector[1] = 1 else: m_passing_vector[2] = 1 return m_passing_vector
def num_bond_in_ring(mol: Chem.rdchem.Mol): """ Check the number of the bonds that are in the ring, count vector. """ count = 0 n_atoms = mol.GetNumAtoms() for a1 in range(n_atoms): for a2 in range(a1 + 1, n_atoms): bond = mol.GetBondBetweenAtoms(a1, a2) if bond is None: continue elif bond.IsInRing(): count += 1 return [count]
def display(mol: Chem.rdchem.Mol, show='name'): # show = 'index' | 'name' if show: atoms = mol.GetNumAtoms() mol = copy.deepcopy(mol) for idx in range(atoms): if show == 'index': mol.GetAtomWithIdx(idx).SetProp('molAtomMapNumber', str(idx)) elif show == 'name': raise NotImplementedError( 'I need to figure out what property is needed as molAtomMapNumber is an str(int)' ) mol.GetAtomWithIdx(idx).SetProp( 'molAtomMapNumber', str(mol.GetAtomWithIdx(idx).GetProp('AtomLabel'))) else: raise ValueError display(Draw.MolToImage(mol)) return None
def createRDKITconf(self, mol: Chem.rdchem.Mol, conversionFactor: float = 0.1): """creates a PyGromosTools CNF type from a rdkit molecule. If a conformation exists the first one will be used. Parameters ---------- mol : Chem.rdchem.Mol Molecule, possibly with a conformation conversionFactor : float the factor used to convert length from rdkit to Gromos (default: angstrom -> nano meter = 0.1) """ inchi = Chem.MolToInchi(mol).split("/") if len(inchi) >= 2: name = inchi[1] else: name = "XXX" self.__setattr__("TITLE", TITLE("\t" + name + " created from RDKit")) # check if conformations exist else create a new one if mol.GetNumConformers() < 1: mol = Chem.AddHs(mol) AllChem.EmbedMolecule(mol) AllChem.UFFOptimizeMolecule(mol) conf = mol.GetConformer(0) # fill a list with atomP types from RDKit data atomList = [] for i in range(mol.GetNumAtoms()): x = conversionFactor * conf.GetAtomPosition(i).x y = conversionFactor * conf.GetAtomPosition(i).y z = conversionFactor * conf.GetAtomPosition(i).z atomType = mol.GetAtomWithIdx(i).GetSymbol() atomList.append(blocks.atomP(resID=1, resName=name, atomType=atomType, atomID=i + 1, xp=x, yp=y, zp=z)) # set POSITION attribute self.__setattr__("POSITION", blocks.POSITION(atomList)) # Defaults set for GENBOX - for liquid sim adjust manually self.__setattr__("GENBOX", blocks.GENBOX(pbc=1, length=[4, 4, 4], angles=[90, 90, 90]))
def tree_decomp( mol: Chem.rdchem.Mol) -> Tuple[List[List[int]], List[Tuple[int, int]]]: n_atoms = mol.GetNumAtoms() cliques = [] for atom in mol.GetAtoms(): if atom.GetDegree() == 0: cliques.append([atom.GetIdx()]) for bond in mol.GetBonds(): a1 = bond.GetBeginAtom().GetIdx() a2 = bond.GetEndAtom().GetIdx() if not bond.IsInRing(): cliques.append([a1, a2]) ssr = [list(x) for x in Chem.GetSymmSSSR(mol)] cliques.extend(ssr) nei_list = [[] for i in range(n_atoms)] for i in range(len(cliques)): for atom in cliques[i]: nei_list[atom].append(i) # Merge Rings with intersection > 2 atoms for i in range(len(cliques)): if len(cliques[i]) <= 2: continue for atom in cliques[i]: for j in nei_list[atom]: if i >= j or len(cliques[j]) <= 2: continue inter = set(cliques[i]) & set(cliques[j]) if len(inter) > 2: cliques[i].extend(cliques[j]) cliques[i] = list(set(cliques[i])) cliques[j] = [] cliques = [c for c in cliques if len(c) > 0] nei_list = [[] for i in range(n_atoms)] for i in range(len(cliques)): for atom in cliques[i]: nei_list[atom].append(i) # Build edges and add singleton cliques edges = defaultdict(int) for atom in range(n_atoms): if len(nei_list[atom]) <= 1: continue cnei = nei_list[atom] bonds = [c for c in cnei if len(cliques[c]) == 2] rings = [c for c in cnei if len(cliques[c]) > 4] if len(bonds) > 2 or ( len(bonds) == 2 and len(cnei) > 2 ): # In general, if len(cnei) >= 3, a singleton should be added, but 1 bond + 2 ring is currently not dealt with. cliques.append([atom]) c2 = len(cliques) - 1 for c1 in cnei: edges[(c1, c2)] = 1 elif len(rings) > 2: # Multiple (n>2) complex rings cliques.append([atom]) c2 = len(cliques) - 1 for c1 in cnei: edges[(c1, c2)] = MST_MAX_WEIGHT - 1 else: for i in range(len(cnei)): for j in range(i + 1, len(cnei)): c1, c2 = cnei[i], cnei[j] inter = set(cliques[c1]) & set(cliques[c2]) if edges[(c1, c2)] < len(inter): edges[(c1, c2)] = len( inter) # cnei[i] < cnei[j] by construction edges = [u + (MST_MAX_WEIGHT - v, ) for u, v in edges.items()] if len(edges) == 0: return cliques, edges # Compute Maximum Spanning Tree row, col, data = zip(*edges) n_clique = len(cliques) clique_graph = csr_matrix((data, (row, col)), shape=(n_clique, n_clique)) junc_tree = minimum_spanning_tree(clique_graph) row, col = junc_tree.nonzero() edges = [(row[i], col[i]) for i in range(len(row))] return cliques, edges
def featurization(r_mol: Chem.rdchem.Mol, p_mol: Chem.rdchem.Mol, ): """ Generates features of the reactant and product for one reaction as input for the network. Args: r_mol: RDKit molecule object for the reactant. p_mol: RDKit molecule object for the product. Returns: data: Torch Geometric Data object, storing the atom and bond features """ # compute properties with rdkit (only works if dataset is clean) r_mol.UpdatePropertyCache() p_mol.UpdatePropertyCache() # fake the number of "atoms" if we are collapsing substructures n_atoms = r_mol.GetNumAtoms() # topological and 3d distance matrices tD_r = Chem.GetDistanceMatrix(r_mol) tD_p = Chem.GetDistanceMatrix(p_mol) D_r = Chem.Get3DDistanceMatrix(r_mol) D_p = Chem.Get3DDistanceMatrix(p_mol) f_atoms = list() # atom (node) features edge_index = list() # list of tuples indicating presence of bonds f_bonds = list() # bond (edge) features for a1 in range(n_atoms): # Node features f_atoms.append(atom_features(r_mol.GetAtomWithIdx(a1))) # Edge features for a2 in range(a1 + 1, n_atoms): # fully connected graph edge_index.extend([(a1, a2), (a2, a1)]) # for now, naively include both reac and prod b1_feats = [D_r[a1][a2], D_p[a1][a2]] b2_feats = [D_r[a2][a1], D_p[a2][a1]] # r_bond = r_mol.GetBondBetweenAtoms(a1, a2) # b1_feats.extend(bond_features(r_bond)) # b2_feats.extend(bond_features(r_bond)) # # p_bond = p_mol.GetBondBetweenAtoms(a1, a2) # b1_feats.extend(bond_features(p_bond)) # b2_feats.extend(bond_features(p_bond)) f_bonds.append(b1_feats) f_bonds.append(b2_feats) data = tg.data.Data() data.x = torch.tensor(f_atoms, dtype=torch.float) data.edge_index = torch.tensor(edge_index, dtype=torch.long).t().contiguous() data.edge_attr = torch.tensor(f_bonds, dtype=torch.float) return data