def process(self, mol: chem.Mol, atom_map: Dict[int, int]) -> GCNGraph: n = mol.GetNumAtoms() + 1 # allocate a new node for graph embedding # all edges (including all self-loops) as index begin_idx = [u.GetBeginAtomIdx() for u in mol.GetBonds()] + [n - 1] * (n - 1) end_idx = [u.GetEndAtomIdx() for u in mol.GetBonds()] + list(range(n - 1)) assert len(begin_idx) == len(end_idx) ran = list(range(n)) index = [begin_idx + end_idx + ran, end_idx + begin_idx + ran] # construct coefficients adjacent matrix deg = torch.tensor( [sqrt(1 / (len(u.GetNeighbors()) + 2)) for u in mol.GetAtoms()] + [sqrt(1 / n)], device=self.device) coeff = deg.reshape(-1, 1) @ deg[None, :] # pairwise coefficients adj = torch.zeros((n, n), device=self.device) adj[index] = coeff[index] # node embedding num = torch.tensor( [atom_map[u.GetAtomicNum()] for u in mol.GetAtoms()] + [len(atom_map)], device=self.device) return GCNGraph(n, adj, num)
def from_rdmol(cls, rdmol: Chem.Mol, atomidx2nodename=None): """ :param rdmol: :param atomidx2nodename: the dict to convert atomidx in rdmol to graph node, atomidx2nodename[rdmolid] == nodename if not None then nodename will be set just based on atomidx :return: """ g = nx.Graph() if atomidx2nodename: index_dict_no = atomidx2nodename for atom in rdmol.GetAtoms(): g.add_node( index_dict_no[atom.GetIdx()], symbol=atom.GetSymbol(), ) for bond in rdmol.GetBonds(): g.add_edge( index_dict_no[bond.GetBeginAtomIdx()], index_dict_no[bond.GetEndAtomIdx()], ) else: for atom in rdmol.GetAtoms(): g.add_node( atom.GetIdx(), symbol=atom.GetSymbol(), ) for bond in rdmol.GetBonds(): g.add_edge( bond.GetBeginAtomIdx(), bond.GetEndAtomIdx(), ) if not nx.is_connected(g): raise GraphError('the graph is not connected!') return cls(g)
def to_graph(mol: Chem.Mol): """Convert a molecule to a network x graph. A list of properties are added to every nodes and edges. Args: mol (Chem.Mol): a molecule. Returns: mol_graph (networkx.Graph): a graph representing the molecule. """ nx = _get_networkx() mol_graph = nx.Graph() for atom in mol.GetAtoms(): mol_graph.add_node( atom.GetIdx(), atomic_num=atom.GetAtomicNum(), formal_charge=atom.GetFormalCharge(), chiral_tag=atom.GetChiralTag(), hybridization=atom.GetHybridization(), num_explicit_hs=atom.GetNumExplicitHs(), implicit_valence=atom.GetImplicitValence(), degree=atom.GetDegree(), symbol=atom.GetSymbol(), ring_atom=atom.IsInRing(), is_aromatic=atom.GetIsAromatic(), ) for bond in mol.GetBonds(): mol_graph.add_edge( bond.GetBeginAtomIdx(), bond.GetEndAtomIdx(), bond_type=bond.GetBondType(), ) return mol_graph
def CalculateAromaticBondNumber(mol: Chem.Mol) -> float: """Calculate number of aromatic bonds.""" i = 0 for bond in mol.GetBonds(): if bond.GetBondType().name == 'AROMATIC': i += 1 return i
def CalculateTripleBondNumber(mol: Chem.Mol) -> float: """Calculate number of triple bonds.""" i = 0 for bond in mol.GetBonds(): if bond.GetBondType().name == 'TRIPLE': i += 1 return i
def CalculateDoubleBondNumber(mol: Chem.Mol) -> float: """Calculate number of double bonds.""" i = 0 for bond in mol.GetBonds(): if bond.GetBondType().name == 'DOUBLE': i += 1 return i
def CalculateSingleBondNumber(mol: Chem.Mol) -> float: """Calculate number of single bonds.""" i = 0 for bond in mol.GetBonds(): if bond.GetBondType().name == 'SINGLE': i += 1 return i
def to_dgl(self: GraphFeaturiser, mol: Mol) -> dgl.DGLGraph: """Generates a DGL graph from a molecule. Args: mol: The molecule to featurise. Returns: A DGL graph of the featurised molecule. """ num_atoms = mol.GetNumAtoms() bonds = mol.GetBonds() bond_from = [bond.GetBeginAtomIdx() for bond in bonds] bond_to = [bond.GetEndAtomIdx() for bond in bonds] g = dgl.graph((torch.tensor(bond_from), torch.tensor(bond_to)), num_nodes=num_atoms) for key, atom_featuriser in self.atom_featurisers.items(): atom_features = atom_featuriser.process_molecule(mol) g.ndata[key] = torch.tensor(atom_features, dtype=torch.float) for key, bond_featuriser in self.bond_featurisers.items(): bond_features = [ bond_featuriser.process_bond(bond) for bond in bonds ] g.edata[key] = torch.tensor(bond_features, dtype=torch.float) g = dgl.add_reverse_edges(g, copy_edata=True) if self.add_self_loops: g = dgl.add_self_loop(g) return g
def rdkit_to_openbabel_mol(mol: Chem.Mol) -> openbabel.OBMol: """Convert a RDKit molecule to an OpenBabel molecule. :param mol: RDKit molecule """ obmol = openbabel.OBMol() # Add hydrogen atoms to complete molecule if needed rdkitmol = Chem.Mol(mol) # Perceive valence and ring information before assigning hydrogens rdkitmol.UpdatePropertyCache(strict=False) rdkitmol = Chem.AddHs(rdkitmol) # Kekulize molecule Chem.rdmolops.Kekulize(rdkitmol, clearAromaticFlags=True) # Add atoms for atom in mol.GetAtoms(): # Create new atom and assign values obatom = obmol.NewAtom() obatom.SetAtomicNum(atom.GetAtomicNum()) obatom.SetIsotope(atom.GetIsotope()) obatom.SetFormalCharge(atom.GetFormalCharge()) obatom.SetPartialCharge(atom.GetDoubleProp('_PartialCharge')) obatom.SetSpinMultiplicity(atom.GetNumRadicalElectrons() + 1) for bond in mol.GetBonds(): obmol.AddBond(bond.GetBeginAtomIdx() + 1, bond.GetEndAtomIdx() + 1, int(bond.GetBondTypeAsDouble())) obmol.AssignSpinMultiplicity(True) return obmol
def CalculateLocalDipoleIndex(mol: Chem.Mol) -> float: """Calculate the local dipole index (D).""" GMCharge.ComputeGasteigerCharges(mol, iter_step) res = [] for atom in mol.GetAtoms(): res.append(float(atom.GetProp('_GasteigerCharge'))) cc = [numpy.absolute(res[x.GetBeginAtom().GetIdx()] - res[x.GetEndAtom().GetIdx()]) for x in mol.GetBonds()] B = len(mol.GetBonds()) return 0 if len(cc) == 0.0 else round(sum(cc) / B, 3)
def set_all_bonds(cls, mol: Chem.Mol, provenance_name: str) -> None: """ Sets the provenance of all bonds in mol to a category, which is a string from the provenance :param mol: :param provenance_name: A string original | main_novel " other_novel | linker :return: """ for bond in mol.GetBonds(): cls.set_bond(bond, provenance_name)
def change_stereobond_in_imine_to_cis(mol: Chem.Mol) -> Chem.Mol: Chem.FindPotentialStereoBonds(mol) for bond in mol.GetBonds(): if bond.GetStereo() == Chem.BondStereo.STEREOANY: logger.debug( f"{bond.GetBeginAtom().GetSymbol()} {bond.GetSmarts()} {bond.GetEndAtom().GetSymbol()}" ) bond.SetStereo(Chem.BondStereo.STEREOZ) return mol
def mol2nx(m: Chem.Mol) -> nx.Graph: pos = mol_coords(m) G = nx.Graph() for a in m.GetAtoms(): G.add_node( a.GetIdx(), atomic_sym=a.GetSymbol(), x=pos[a.GetIdx()][0], y=pos[a.GetIdx()][1], ) for b in m.GetBonds(): G.add_edge(b.GetBeginAtom().GetIdx(), b.GetEndAtom().GetIdx()) return G
def process(mol: Mol, device: torch.device, **kwargs): n = mol.GetNumAtoms() + 1 graph = DGLGraph() graph.add_nodes(n) graph.add_edges(graph.nodes(), graph.nodes()) graph.add_edges(range(1, n), 0) # graph.add_edges(0, range(1, n)) for e in mol.GetBonds(): u, v = e.GetBeginAtomIdx(), e.GetEndAtomIdx() graph.add_edge(u + 1, v + 1) graph.add_edge(v + 1, u + 1) adj = graph.adjacency_matrix(transpose=False).to_dense() v, m = feature.mol_feature(mol) vec = torch.cat([torch.zeros((1, m)), v]).to(device) return ChebNetData(n, adj, vec)