def get_dihedral(self, mol, atom1, atom2, atom3, atom4, units="radians"): """ It calculates the value of the dihedral angle in the specified units (default radians) Parameters ---------- molecule : an offpele.topology.Molecule The offpele's Molecule object atom1 : int Index of the first atom in the dihedral atom2 : int Index of the second atom in the dihedral atom3 : int Index of the third atom in the dihedral atom4 : int Index of the fourth atom in the dihedral units : str The units in which to calculate the angle (default is radians, can be radians or degrees) """ from rdkit.Chem import rdMolTransforms if units == "degrees": angle = rdMolTransforms.GetDihedralDeg( mol.rdkit_molecule.GetConformer(), atom1, atom2, atom3, atom4) else: angle = rdMolTransforms.GetDihedralRad( mol.rdkit_molecule.GetConformer(), atom1, atom2, atom3, atom4) return angle
def GetTorsionRad(self, torsion: list): """ Get the dihedral angle of the torsion in rad. The torsion can be defined by any atoms in the molecule (not necessarily bonded atoms.) Args: torsion (list): A list of four atom indexes. Returns: float: The dihedral angle of the torsion in rad. """ return rdMT.GetDihedralRad(self._conf, *torsion)
def get_4body(self): """ 4-body parts: dihedral angles in forcefield """ for ib in range(self.nb): j, k = self.iasb[ib] if self.zs[j] > self.zs[k]: tv = k k = j j = tv neibs1 = self.m.GetAtomWithIdx(j).GetNeighbors() n1 = len(neibs1) neibs2 = self.m.GetAtomWithIdx(k).GetNeighbors() n2 = len(neibs2) for i0 in range(n1): for l0 in range(n2): i = neibs1[i0].GetIdx() l = neibs2[l0].GetIdx() ias = [i, j, k, l] if len(set(ias)) == 4: if self.zs[j] == self.zs[k]: if self.zs[i] > self.zs[l]: ias = [l, k, j, i] zsi = [self.zs[ia] for ia in ias] type4 = '-'.join(['%d' % zi for zi in zsi]) if self.imbt: if type4 not in self.mbs4: self.mbs4.append(type4) continue #assert type4 in self.dic4.keys() _tor = rdMolTransforms.GetDihedralRad( self.m.GetConformer(), ias[0], ias[1], ias[2], ias[3]) tor = _tor * (-1.) if _tor < 0. else _tor #assert tor <= np.pi if tor > np.pi: #print type4, tor raise '#ERROR:' self.dic4[type4] += [tor] if not self.imbt: distr4 = [] for mb4 in self.mbs4: self.gaussian(self.xs4, self.dic4[mb4], self.sigmas[2]) distr4.append(self.ys) self.distr4 = distr4
def testGetSetDihedral(self): file = os.path.join(RDConfig.RDBaseDir, 'Code', 'GraphMol', 'MolTransforms', 'test_data', '3-cyclohexylpyridine.mol') m = Chem.MolFromMolFile(file, True, False) conf = m.GetConformer() dihedral = rdmt.GetDihedralDeg(conf, 0, 19, 21, 24) self.failUnlessAlmostEqual(dihedral, 176.05, 2) rdmt.SetDihedralDeg(conf, 8, 0, 19, 21, 65.0) dihedral = rdmt.GetDihedralDeg(conf, 8, 0, 19, 21) self.failUnlessAlmostEqual(dihedral, 65.0, 1) rdmt.SetDihedralDeg(conf, 8, 0, 19, 21, -130.0) dihedral = rdmt.GetDihedralDeg(conf, 8, 0, 19, 21) self.failUnlessAlmostEqual(dihedral, -130.0, 1) rdmt.SetDihedralRad(conf, 21, 19, 0, 8, -2. / 3. * math.pi) dihedral = rdmt.GetDihedralRad(conf, 8, 0, 19, 21) self.failUnlessAlmostEqual(dihedral, -2. / 3. * math.pi, 1) dihedral = rdmt.GetDihedralDeg(conf, 8, 0, 19, 21) self.failUnlessAlmostEqual(dihedral, -120.0, 1)
def make_graph(molecule_name, gb_structure, gb_scalar_coupling, mc, ob_mc, scc_mean_and_std): #https://stackoverflow.com/questions/14734533/how-to-access-pandas-groupby-dataframe-by-key #---- df = gb_scalar_coupling.get_group(molecule_name) # ['id', 'molecule_name', 'atom_index_0', 'atom_index_1', 'type', # 'scalar_coupling_constant', 'fc', 'sd', 'pso', 'dso'], # nomalize scc_arr = df[['fc', 'sd', 'pso', 'dso']].values norm_scc = np.zeros((len(df), 4)) for i, (mean_v, std_v) in enumerate(scc_mean_and_std): norm_scc[:, i] = (scc_arr[:, i] - mean_v) / std_v # make coupling coupling = ( df.id.values, scc_arr, df[['atom_index_0', 'atom_index_1']].values, #type = np.array([ one_hot_encoding(t,COUPLING_TYPE) for t in df.type.values ], np.uint8) np.array([COUPLING_TYPE.index(t) for t in df.type.values], np.int32), df.scalar_coupling_constant.values, norm_scc) #---- df = gb_structure.get_group(molecule_name) df = df.sort_values(['atom_index'], ascending=True) # ['molecule_name', 'atom_index', 'atom', 'x', 'y', 'z'] a = df.atom.values.tolist() xyz = df[['x', 'y', 'z']].values mol = mol_from_axyz(a, xyz) #--- assert ( #check a == [ mol.GetAtomWithIdx(i).GetSymbol() for i in range(mol.GetNumAtoms()) ]) #--- factory = ChemicalFeatures.BuildFeatureFactory( os.path.join(RDConfig.RDDataDir, 'BaseFeatures.fdef')) feature = factory.GetFeaturesForMol(mol) ## ** node ** #[ a.GetSymbol() for a in mol.GetAtoms() ] num_atom = mol.GetNumAtoms() symbol = np.zeros((num_atom, len(SYMBOL)), np.uint8) #category acceptor = np.zeros((num_atom, 1), np.uint8) donor = np.zeros((num_atom, 1), np.uint8) aromatic = np.zeros((num_atom, 1), np.uint8) hybridization = np.zeros((num_atom, len(HYBRIDIZATION)), np.uint8) num_h = np.zeros((num_atom, 1), np.float32) #real atomic = np.zeros((num_atom, 1), np.float32) # ring check isotope = np.zeros((num_atom, 1), np.uint8) isin_ring = np.zeros((num_atom, 1), np.uint8) ring_types = [3, 4, 5, 6, 7, 8] n_ring = np.zeros((num_atom, len(ring_types)), np.uint8) for i in range(num_atom): atom = mol.GetAtomWithIdx(i) symbol[i] = one_hot_encoding(atom.GetSymbol(), SYMBOL) aromatic[i] = atom.GetIsAromatic() hybridization[i] = one_hot_encoding(atom.GetHybridization(), HYBRIDIZATION) num_h[i] = atom.GetTotalNumHs(includeNeighbors=True) atomic[i] = atom.GetAtomicNum() #new isotope[i] = atom.GetIsotope() isin_ring[i] = atom.IsInRing() for j, ring_i in enumerate(ring_types): n_ring[i, j] = atom.IsInRingSize(ring_i) #[f.GetFamily() for f in feature] for t in range(0, len(feature)): if feature[t].GetFamily() == 'Donor': for i in feature[t].GetAtomIds(): donor[i] = 1 elif feature[t].GetFamily() == 'Acceptor': for i in feature[t].GetAtomIds(): acceptor[i] = 1 #edge_angleを出すためのindex_listの作成 n_bonds = len(mol.GetBonds()) bond_arr_0 = [(mol.GetBonds()[i].GetBeginAtomIdx(), mol.GetBonds()[i].GetEndAtomIdx()) for i in range(n_bonds)] bond_arr_1 = [(mol.GetBonds()[i].GetEndAtomIdx(), mol.GetBonds()[i].GetBeginAtomIdx()) for i in range(n_bonds)] bond_df = pd.DataFrame(bond_arr_0 + bond_arr_1, columns=["a0", "a1"]) bond_df3 = bond_df.merge(bond_df.rename(columns={ "a1": "a2", "a0": "a1" }), on="a1") bond_df3 = bond_df3[bond_df3.a0 != bond_df3.a2] bond_df4 = bond_df3.merge(bond_df.rename(columns={ "a0": "a2", "a1": "a3" }), on="a2") bond_df4 = bond_df4[(bond_df4.a0 != bond_df4.a3) & (bond_df4.a1 != bond_df4.a3)] bond_idx3 = bond_df3.values bond_idx4 = bond_df4.values #all_bond_df = all_bond_df.sort_values(["a0","a1","a2"]).reset_index(drop=True) ## ** edge ** num_edge = num_atom * num_atom - num_atom edge_index = np.zeros((num_edge, 2), np.uint8) bond_type = np.zeros((num_edge, len(BOND_TYPE)), np.uint8) #category distance = np.zeros((num_edge, 1), np.float32) #real angle = np.zeros((num_edge, 1), np.float32) #real dihedrals_min = np.zeros((num_edge, 1), np.float32) dihedrals_max = np.zeros((num_edge, 1), np.float32) dihedrals_diff = np.zeros((num_edge, 1), np.float32) conjugate = np.zeros((num_edge, 1), np.uint8) is_ring_edge = np.zeros((num_edge, 1), np.uint8) bond_type_detail = np.zeros((num_edge, 1), np.uint16) #np.zeros((num_edge,1), str) #norm_xyz = preprocessing.normalize(xyz, norm='l2') ij = 0 ij_dict = {} for i in range(num_atom): for j in range(num_atom): if i == j: continue edge_index[ij] = [i, j] bond = mol.GetBondBetweenAtoms(i, j) if bond is not None: bond_type[ij] = one_hot_encoding(bond.GetBondType(), BOND_TYPE) conjugate[ij] = bond.GetIsConjugated() # add ring info is_ring_edge[ij] = isin_ring[i, 0] and isin_ring[j, 0] # bond type detail #print(a[i] +"_"+ str(bond.GetBondType()) +"_" + a[j]) bond_type_detail[ij] = bond_detail_dict[a[i] + "_" + str( bond.GetBondType()) + "_" + a[j]] #print(a[i] + str(bond.GetBondType()) + a[j]) distance[ij] = np.linalg.norm(xyz[i] - xyz[j]) #angle[ij] = (norm_xyz[i]*norm_xyz[j]).sum() ij_dict[(i, j)] = ij ij += 1 conf = mol.GetConformer(0) for i, j, k in bond_idx3: #角Ni-Nj-Nkはi-kエッジとして格納 ## なにもない0とcos = 0を区別できるように-1する angle[ij_dict[(i, k)]] = np.cos( rdMolTransforms.GetAngleRad(conf, int(i), int(j), int(k))) + 2 for i, j, k, l in bond_idx4: #角Ni-Nj-Nk-Nlはi-lエッジとして格納 ## なにもない0とcos = 0を区別できるように-1する dihedral = np.cos( rdMolTransforms.GetDihedralRad(conf, int(i), int(j), int(k), int(l))) + 2 if angle[ij_dict[(i, k)]] < 1.1 or angle[ij_dict[(i, k)]] < 1.1: dihedral = -1 dihedrals_min[ij_dict[(i, l)]] = min(angle[ij_dict[(i, l)]], dihedral) dihedrals_max[ij_dict[(i,l)]]= max(angle[ij_dict[(i,l)]], dihedral)\ if angle[ij_dict[(i,l)]] != 0 else dihedral dihedrals_diff = dihedrals_max - dihedrals_min # add mulliken charge mc_df = mc.get_group(molecule_name) mc_df = mc_df.sort_values(['atom_index'], ascending=True) mc_values = mc_df["mulliken_charge"].values.reshape([-1, 1]) ob_mc_df = ob_mc.get_group(molecule_name) ob_mc_df = ob_mc_df.sort_values(['atom_index'], ascending=True) ob_mc_values = ob_mc_df[[ c for c in ob_mc_df.columns if c not in ["molecule_name", "atom_index"] ]].values ##------------------- graph = ( molecule_name, Chem.MolToSmiles(mol), [a, xyz], # node feature [ symbol, acceptor, donor, aromatic, hybridization, num_h, isotope, isin_ring, n_ring, atomic, mc_values, ob_mc_values ], # edge feature [ bond_type, distance, angle, dihedrals_min, dihedrals_max, dihedrals_diff, conjugate, is_ring_edge ], edge_index, coupling, # edge bond type detail info (string) bond_type_detail, ) return graph
def make_graph(molecule_name, gb_structure, gb_scalar_coupling, ): #https://stackoverflow.com/questions/14734533/how-to-access-pandas-groupby-dataframe-by-key #---- df = gb_scalar_coupling.get_group(molecule_name) #---- df = gb_structure.get_group(molecule_name) df = df.sort_values(['atom_index'], ascending=True) a = df.atom.values.tolist() xyz = df[['x','y','z']].values mol = mol_from_axyz(a, xyz) #--- assert( #check a == [ mol.GetAtomWithIdx(i).GetSymbol() for i in range(mol.GetNumAtoms())] ) #--- factory = ChemicalFeatures.BuildFeatureFactory(os.path.join(RDConfig.RDDataDir, 'BaseFeatures.fdef')) feature = factory.GetFeaturesForMol(mol) ## ** node ** num_atom = mol.GetNumAtoms() symbol = np.zeros((num_atom,len(SYMBOL)),np.uint8) #category acceptor = np.zeros((num_atom,1),np.uint8) donor = np.zeros((num_atom,1),np.uint8) aromatic = np.zeros((num_atom,1),np.uint8) hybridization = np.zeros((num_atom,len(HYBRIDIZATION)),np.uint8) num_h = np.zeros((num_atom,1),np.float32)#real atomic = np.zeros((num_atom,1),np.float32) # new isotope = np.zeros((num_atom,1),np.uint8) isin_ring = np.zeros((num_atom,1),np.uint8) ring_types = [3,4,5,6] n_ring = np.zeros((num_atom,len(ring_types)),np.uint8) for i in range(num_atom): atom = mol.GetAtomWithIdx(i) symbol[i] = one_hot_encoding(atom.GetSymbol(),SYMBOL) aromatic[i] = atom.GetIsAromatic() hybridization[i] = one_hot_encoding(atom.GetHybridization(),HYBRIDIZATION) num_h[i] = atom.GetTotalNumHs(includeNeighbors=True) atomic[i] = atom.GetAtomicNum() #new isotope[i] = atom.GetIsotope() isin_ring[i] = atom.IsInRing() for j,ring_i in enumerate(ring_types): n_ring[i,j] = atom.IsInRingSize(ring_i) #[f.GetFamily() for f in feature] for t in range(0, len(feature)): if feature[t].GetFamily() == 'Donor': for i in feature[t].GetAtomIds(): donor[i] = 1 elif feature[t].GetFamily() == 'Acceptor': for i in feature[t].GetAtomIds(): acceptor[i] = 1 #edge_angleを出すためのindex_listの作成 n_bonds = len(mol.GetBonds()) bond_arr_0 = [(mol.GetBonds()[i].GetBeginAtomIdx(), mol.GetBonds()[i].GetEndAtomIdx()) for i in range(n_bonds)] bond_arr_1 = [(mol.GetBonds()[i].GetEndAtomIdx(), mol.GetBonds()[i].GetBeginAtomIdx()) for i in range(n_bonds)] bond_df = pd.DataFrame(bond_arr_0+bond_arr_1,columns=["a0","a1"]) bond_df3 = bond_df.merge(bond_df.rename(columns={"a1":"a2","a0":"a1"}), on="a1") bond_df3 = bond_df3[bond_df3.a0 != bond_df3.a2] bond_df4 = bond_df3.merge(bond_df.rename(columns={"a0":"a2","a1":"a3"}), on="a2") bond_df4 = bond_df4[(bond_df4.a0 != bond_df4.a3)&(bond_df4.a1 != bond_df4.a3)] bond_idx3 = bond_df3.values bond_idx4 = bond_df4.values #all_bond_df = all_bond_df.sort_values(["a0","a1","a2"]).reset_index(drop=True) ## ** edge ** num_edge = num_atom*num_atom - num_atom edge_index = np.zeros((num_edge,2), np.uint8) bond_type = np.zeros((num_edge,len(BOND_TYPE)), np.uint8)#category distance = np.zeros((num_edge,1),np.float32) #real angle = np.zeros((num_edge,1),np.float32) #real dihedrals_min = np.zeros((num_edge,1),np.float32) dihedrals_max = np.zeros((num_edge,1),np.float32) dihedrals_diff = np.zeros((num_edge,1),np.float32) conjugate = np.zeros((num_edge,1),np.uint8) #GetIsConjugated norm_xyz = preprocessing.normalize(xyz, norm='l2') ij=0 ij_dict = {} for i in range(num_atom): for j in range(num_atom): if i==j: continue edge_index[ij] = [i,j] bond = mol.GetBondBetweenAtoms(i, j) if bond is not None: bond_type[ij] = one_hot_encoding(bond.GetBondType(),BOND_TYPE) conjugate[ij] = bond.GetIsConjugated() distance[ij] = np.linalg.norm(xyz[i] - xyz[j]) #angle[ij] = (norm_xyz[i]*norm_xyz[j]).sum() ij_dict[(i,j)] = ij ij+=1 conf = mol.GetConformer(0) for i,j,k in bond_idx3: #角Ni-Nj-Nkはi-kエッジとして格納 ## なにもない0とcos = 0を区別できるように-1する angle[ij_dict[(i,k)]] = np.cos(rdMolTransforms.GetAngleRad(conf,int(i),int(j),int(k)))-1 for i,j,k,l in bond_idx4: #角Ni-Nj-Nk-Nlはi-lエッジとして格納 ## なにもない0とcos = 0を区別できるように-1する dihedral = np.cos(rdMolTransforms.GetDihedralRad(conf,int(i),int(j),int(k),int(l)))-1 dihedrals_min[ij_dict[(i,l)]] = min(angle[ij_dict[(i,l)]], dihedral) dihedrals_max[ij_dict[(i,l)]]= max(angle[ij_dict[(i,l)]], dihedral)\ if angle[ij_dict[(i,l)]] != 0 else dihedral dihedrals_diff = dihedrals_max - dihedrals_min ##------------------- atom_ret = pd.DataFrame({ "molecule_name":molecule_name, "atom_index": df["atom_index"].values, "is_accepter": acceptor, "is_donor": donor, "is_aromatic":aromatic, "is_sp1": hybridization[:,0], "is_sp2": hybridization[:,1], "is_sp3": hybridization[:,2], "is_isotope": isotope, "isin_ring": isin_ring, "is_ring3": n_ring[:,0], "is_ring4": n_ring[:,1], "is_ring5": n_ring[:,2], "is_ring6": n_ring[:,3], }) [bond_type, distance, angle, dihedrals_min, dihedrals_max, dihedrals_diff, conjugate,], edge_index,