def _check_and_straighten_at_triple_bond(at, bond, conf): if at.GetDegree() != 2: raise ValueError("only works with degree 2") nbrs = [x.GetIdx() for x in at.GetNeighbors()] angle = rdMolTransforms.GetAngleRad(conf, nbrs[0], at.GetIdx(), nbrs[1]) # are we off by more than a degree? if (abs(abs(angle) - math.pi) > 0.017): rdMolTransforms.SetAngleRad(conf, nbrs[0], at.GetIdx(), nbrs[1], math.pi)
def _cleanup_allenes(m): conf = m.GetConformer() if conf.Is3D(): raise ValueError("can only operate on 2D conformers") p = Chem.MolFromSmarts('*=[C;R0]=*') for match in m.GetSubstructMatches(p): angle = rdMolTransforms.GetAngleRad(conf, match[0], match[1], match[2]) # are we off by more than a degree? if (abs(abs(angle) - math.pi) > 0.017): rdMolTransforms.SetAngleRad(conf, match[0], match[1], match[2], math.pi)
def testGetSetAngle(self): file = os.path.join(RDConfig.RDBaseDir, 'Code', 'GraphMol', 'MolTransforms', 'test_data', '3-cyclohexylpyridine.mol') m = Chem.MolFromMolFile(file, True, False) conf = m.GetConformer() angle = rdmt.GetAngleDeg(conf, 0, 19, 21) self.failUnlessAlmostEqual(angle, 109.7, 1) rdmt.SetAngleDeg(conf, 0, 19, 21, 125.0) angle = rdmt.GetAngleDeg(conf, 0, 19, 21) self.failUnlessAlmostEqual(angle, 125.0, 1) rdmt.SetAngleRad(conf, 21, 19, 0, math.pi / 2.) angle = rdmt.GetAngleRad(conf, 0, 19, 21) self.failUnlessAlmostEqual(angle, math.pi / 2., 1) angle = rdmt.GetAngleDeg(conf, 0, 19, 21) self.failUnlessAlmostEqual(angle, 90.0, 1)
def GetRingBondAng(mol, ringpath): """ Get bond angles of the ring Input: mol: rdmol ringpath: list Return: bondang: list (output in radian) """ N = len(ringpath) atoms = [[ringpath[i], ringpath[(i + 1) % N], ringpath[(i + 2) % N]] for i in range(N)] molconf = mol.GetConformer() bondang = [ rdMolTransforms.GetAngleRad(molconf, x[0], x[1], x[2]) for x in atoms ] return bondang
def get_3body(self): """ 3-body parts: angles spanned by 3 adjacent atoms, must be a valid angle in forcefield """ for aj in self.m.GetAtoms(): j = aj.GetIdx() zj = self.zs[j] neibs = aj.GetNeighbors() nneib = len(neibs) if zj > 1 and nneib > 1: for i0 in range(nneib): for k0 in range(i0 + 1, nneib): i, k = neibs[i0].GetIdx(), neibs[k0].GetIdx() ias = [i, j, k] if self.zs[i] > self.zs[k]: ias = [k, j, i] zsi = [self.zs[ia] for ia in ias] type3 = '-'.join(['%d' % zi for zi in zsi]) if self.imbt: if type3 not in self.mbs3: self.mbs3.append(type3) continue #assert type3 in self.dic3.keys() _theta = rdMolTransforms.GetAngleRad( self.m.GetConformer(), ias[0], ias[1], ias[2]) theta = _theta * (-1.) if _theta < 0. else _theta #assert theta <= np.pi if theta > np.pi: raise '#ERROR: `thea > np.pi?' self.dic3[type3] += [theta] if not self.imbt: distr3 = [] for mb3 in self.mbs3: self.gaussian(self.xs3, self.dic3[mb3], self.sigmas[1]) distr3.append(self.ys) self.distr3 = distr3
def make_graph(molecule_name, gb_structure, gb_scalar_coupling, mc, ob_mc, scc_mean_and_std): #https://stackoverflow.com/questions/14734533/how-to-access-pandas-groupby-dataframe-by-key #---- df = gb_scalar_coupling.get_group(molecule_name) # ['id', 'molecule_name', 'atom_index_0', 'atom_index_1', 'type', # 'scalar_coupling_constant', 'fc', 'sd', 'pso', 'dso'], # nomalize scc_arr = df[['fc', 'sd', 'pso', 'dso']].values norm_scc = np.zeros((len(df), 4)) for i, (mean_v, std_v) in enumerate(scc_mean_and_std): norm_scc[:, i] = (scc_arr[:, i] - mean_v) / std_v # make coupling coupling = ( df.id.values, scc_arr, df[['atom_index_0', 'atom_index_1']].values, #type = np.array([ one_hot_encoding(t,COUPLING_TYPE) for t in df.type.values ], np.uint8) np.array([COUPLING_TYPE.index(t) for t in df.type.values], np.int32), df.scalar_coupling_constant.values, norm_scc) #---- df = gb_structure.get_group(molecule_name) df = df.sort_values(['atom_index'], ascending=True) # ['molecule_name', 'atom_index', 'atom', 'x', 'y', 'z'] a = df.atom.values.tolist() xyz = df[['x', 'y', 'z']].values mol = mol_from_axyz(a, xyz) #--- assert ( #check a == [ mol.GetAtomWithIdx(i).GetSymbol() for i in range(mol.GetNumAtoms()) ]) #--- factory = ChemicalFeatures.BuildFeatureFactory( os.path.join(RDConfig.RDDataDir, 'BaseFeatures.fdef')) feature = factory.GetFeaturesForMol(mol) ## ** node ** #[ a.GetSymbol() for a in mol.GetAtoms() ] num_atom = mol.GetNumAtoms() symbol = np.zeros((num_atom, len(SYMBOL)), np.uint8) #category acceptor = np.zeros((num_atom, 1), np.uint8) donor = np.zeros((num_atom, 1), np.uint8) aromatic = np.zeros((num_atom, 1), np.uint8) hybridization = np.zeros((num_atom, len(HYBRIDIZATION)), np.uint8) num_h = np.zeros((num_atom, 1), np.float32) #real atomic = np.zeros((num_atom, 1), np.float32) # ring check isotope = np.zeros((num_atom, 1), np.uint8) isin_ring = np.zeros((num_atom, 1), np.uint8) ring_types = [3, 4, 5, 6, 7, 8] n_ring = np.zeros((num_atom, len(ring_types)), np.uint8) for i in range(num_atom): atom = mol.GetAtomWithIdx(i) symbol[i] = one_hot_encoding(atom.GetSymbol(), SYMBOL) aromatic[i] = atom.GetIsAromatic() hybridization[i] = one_hot_encoding(atom.GetHybridization(), HYBRIDIZATION) num_h[i] = atom.GetTotalNumHs(includeNeighbors=True) atomic[i] = atom.GetAtomicNum() #new isotope[i] = atom.GetIsotope() isin_ring[i] = atom.IsInRing() for j, ring_i in enumerate(ring_types): n_ring[i, j] = atom.IsInRingSize(ring_i) #[f.GetFamily() for f in feature] for t in range(0, len(feature)): if feature[t].GetFamily() == 'Donor': for i in feature[t].GetAtomIds(): donor[i] = 1 elif feature[t].GetFamily() == 'Acceptor': for i in feature[t].GetAtomIds(): acceptor[i] = 1 #edge_angleを出すためのindex_listの作成 n_bonds = len(mol.GetBonds()) bond_arr_0 = [(mol.GetBonds()[i].GetBeginAtomIdx(), mol.GetBonds()[i].GetEndAtomIdx()) for i in range(n_bonds)] bond_arr_1 = [(mol.GetBonds()[i].GetEndAtomIdx(), mol.GetBonds()[i].GetBeginAtomIdx()) for i in range(n_bonds)] bond_df = pd.DataFrame(bond_arr_0 + bond_arr_1, columns=["a0", "a1"]) bond_df3 = bond_df.merge(bond_df.rename(columns={ "a1": "a2", "a0": "a1" }), on="a1") bond_df3 = bond_df3[bond_df3.a0 != bond_df3.a2] bond_df4 = bond_df3.merge(bond_df.rename(columns={ "a0": "a2", "a1": "a3" }), on="a2") bond_df4 = bond_df4[(bond_df4.a0 != bond_df4.a3) & (bond_df4.a1 != bond_df4.a3)] bond_idx3 = bond_df3.values bond_idx4 = bond_df4.values #all_bond_df = all_bond_df.sort_values(["a0","a1","a2"]).reset_index(drop=True) ## ** edge ** num_edge = num_atom * num_atom - num_atom edge_index = np.zeros((num_edge, 2), np.uint8) bond_type = np.zeros((num_edge, len(BOND_TYPE)), np.uint8) #category distance = np.zeros((num_edge, 1), np.float32) #real angle = np.zeros((num_edge, 1), np.float32) #real dihedrals_min = np.zeros((num_edge, 1), np.float32) dihedrals_max = np.zeros((num_edge, 1), np.float32) dihedrals_diff = np.zeros((num_edge, 1), np.float32) conjugate = np.zeros((num_edge, 1), np.uint8) is_ring_edge = np.zeros((num_edge, 1), np.uint8) bond_type_detail = np.zeros((num_edge, 1), np.uint16) #np.zeros((num_edge,1), str) #norm_xyz = preprocessing.normalize(xyz, norm='l2') ij = 0 ij_dict = {} for i in range(num_atom): for j in range(num_atom): if i == j: continue edge_index[ij] = [i, j] bond = mol.GetBondBetweenAtoms(i, j) if bond is not None: bond_type[ij] = one_hot_encoding(bond.GetBondType(), BOND_TYPE) conjugate[ij] = bond.GetIsConjugated() # add ring info is_ring_edge[ij] = isin_ring[i, 0] and isin_ring[j, 0] # bond type detail #print(a[i] +"_"+ str(bond.GetBondType()) +"_" + a[j]) bond_type_detail[ij] = bond_detail_dict[a[i] + "_" + str( bond.GetBondType()) + "_" + a[j]] #print(a[i] + str(bond.GetBondType()) + a[j]) distance[ij] = np.linalg.norm(xyz[i] - xyz[j]) #angle[ij] = (norm_xyz[i]*norm_xyz[j]).sum() ij_dict[(i, j)] = ij ij += 1 conf = mol.GetConformer(0) for i, j, k in bond_idx3: #角Ni-Nj-Nkはi-kエッジとして格納 ## なにもない0とcos = 0を区別できるように-1する angle[ij_dict[(i, k)]] = np.cos( rdMolTransforms.GetAngleRad(conf, int(i), int(j), int(k))) + 2 for i, j, k, l in bond_idx4: #角Ni-Nj-Nk-Nlはi-lエッジとして格納 ## なにもない0とcos = 0を区別できるように-1する dihedral = np.cos( rdMolTransforms.GetDihedralRad(conf, int(i), int(j), int(k), int(l))) + 2 if angle[ij_dict[(i, k)]] < 1.1 or angle[ij_dict[(i, k)]] < 1.1: dihedral = -1 dihedrals_min[ij_dict[(i, l)]] = min(angle[ij_dict[(i, l)]], dihedral) dihedrals_max[ij_dict[(i,l)]]= max(angle[ij_dict[(i,l)]], dihedral)\ if angle[ij_dict[(i,l)]] != 0 else dihedral dihedrals_diff = dihedrals_max - dihedrals_min # add mulliken charge mc_df = mc.get_group(molecule_name) mc_df = mc_df.sort_values(['atom_index'], ascending=True) mc_values = mc_df["mulliken_charge"].values.reshape([-1, 1]) ob_mc_df = ob_mc.get_group(molecule_name) ob_mc_df = ob_mc_df.sort_values(['atom_index'], ascending=True) ob_mc_values = ob_mc_df[[ c for c in ob_mc_df.columns if c not in ["molecule_name", "atom_index"] ]].values ##------------------- graph = ( molecule_name, Chem.MolToSmiles(mol), [a, xyz], # node feature [ symbol, acceptor, donor, aromatic, hybridization, num_h, isotope, isin_ring, n_ring, atomic, mc_values, ob_mc_values ], # edge feature [ bond_type, distance, angle, dihedrals_min, dihedrals_max, dihedrals_diff, conjugate, is_ring_edge ], edge_index, coupling, # edge bond type detail info (string) bond_type_detail, ) return graph
def make_graph(molecule_name, gb_structure, gb_scalar_coupling, ): #https://stackoverflow.com/questions/14734533/how-to-access-pandas-groupby-dataframe-by-key #---- df = gb_scalar_coupling.get_group(molecule_name) #---- df = gb_structure.get_group(molecule_name) df = df.sort_values(['atom_index'], ascending=True) a = df.atom.values.tolist() xyz = df[['x','y','z']].values mol = mol_from_axyz(a, xyz) #--- assert( #check a == [ mol.GetAtomWithIdx(i).GetSymbol() for i in range(mol.GetNumAtoms())] ) #--- factory = ChemicalFeatures.BuildFeatureFactory(os.path.join(RDConfig.RDDataDir, 'BaseFeatures.fdef')) feature = factory.GetFeaturesForMol(mol) ## ** node ** num_atom = mol.GetNumAtoms() symbol = np.zeros((num_atom,len(SYMBOL)),np.uint8) #category acceptor = np.zeros((num_atom,1),np.uint8) donor = np.zeros((num_atom,1),np.uint8) aromatic = np.zeros((num_atom,1),np.uint8) hybridization = np.zeros((num_atom,len(HYBRIDIZATION)),np.uint8) num_h = np.zeros((num_atom,1),np.float32)#real atomic = np.zeros((num_atom,1),np.float32) # new isotope = np.zeros((num_atom,1),np.uint8) isin_ring = np.zeros((num_atom,1),np.uint8) ring_types = [3,4,5,6] n_ring = np.zeros((num_atom,len(ring_types)),np.uint8) for i in range(num_atom): atom = mol.GetAtomWithIdx(i) symbol[i] = one_hot_encoding(atom.GetSymbol(),SYMBOL) aromatic[i] = atom.GetIsAromatic() hybridization[i] = one_hot_encoding(atom.GetHybridization(),HYBRIDIZATION) num_h[i] = atom.GetTotalNumHs(includeNeighbors=True) atomic[i] = atom.GetAtomicNum() #new isotope[i] = atom.GetIsotope() isin_ring[i] = atom.IsInRing() for j,ring_i in enumerate(ring_types): n_ring[i,j] = atom.IsInRingSize(ring_i) #[f.GetFamily() for f in feature] for t in range(0, len(feature)): if feature[t].GetFamily() == 'Donor': for i in feature[t].GetAtomIds(): donor[i] = 1 elif feature[t].GetFamily() == 'Acceptor': for i in feature[t].GetAtomIds(): acceptor[i] = 1 #edge_angleを出すためのindex_listの作成 n_bonds = len(mol.GetBonds()) bond_arr_0 = [(mol.GetBonds()[i].GetBeginAtomIdx(), mol.GetBonds()[i].GetEndAtomIdx()) for i in range(n_bonds)] bond_arr_1 = [(mol.GetBonds()[i].GetEndAtomIdx(), mol.GetBonds()[i].GetBeginAtomIdx()) for i in range(n_bonds)] bond_df = pd.DataFrame(bond_arr_0+bond_arr_1,columns=["a0","a1"]) bond_df3 = bond_df.merge(bond_df.rename(columns={"a1":"a2","a0":"a1"}), on="a1") bond_df3 = bond_df3[bond_df3.a0 != bond_df3.a2] bond_df4 = bond_df3.merge(bond_df.rename(columns={"a0":"a2","a1":"a3"}), on="a2") bond_df4 = bond_df4[(bond_df4.a0 != bond_df4.a3)&(bond_df4.a1 != bond_df4.a3)] bond_idx3 = bond_df3.values bond_idx4 = bond_df4.values #all_bond_df = all_bond_df.sort_values(["a0","a1","a2"]).reset_index(drop=True) ## ** edge ** num_edge = num_atom*num_atom - num_atom edge_index = np.zeros((num_edge,2), np.uint8) bond_type = np.zeros((num_edge,len(BOND_TYPE)), np.uint8)#category distance = np.zeros((num_edge,1),np.float32) #real angle = np.zeros((num_edge,1),np.float32) #real dihedrals_min = np.zeros((num_edge,1),np.float32) dihedrals_max = np.zeros((num_edge,1),np.float32) dihedrals_diff = np.zeros((num_edge,1),np.float32) conjugate = np.zeros((num_edge,1),np.uint8) #GetIsConjugated norm_xyz = preprocessing.normalize(xyz, norm='l2') ij=0 ij_dict = {} for i in range(num_atom): for j in range(num_atom): if i==j: continue edge_index[ij] = [i,j] bond = mol.GetBondBetweenAtoms(i, j) if bond is not None: bond_type[ij] = one_hot_encoding(bond.GetBondType(),BOND_TYPE) conjugate[ij] = bond.GetIsConjugated() distance[ij] = np.linalg.norm(xyz[i] - xyz[j]) #angle[ij] = (norm_xyz[i]*norm_xyz[j]).sum() ij_dict[(i,j)] = ij ij+=1 conf = mol.GetConformer(0) for i,j,k in bond_idx3: #角Ni-Nj-Nkはi-kエッジとして格納 ## なにもない0とcos = 0を区別できるように-1する angle[ij_dict[(i,k)]] = np.cos(rdMolTransforms.GetAngleRad(conf,int(i),int(j),int(k)))-1 for i,j,k,l in bond_idx4: #角Ni-Nj-Nk-Nlはi-lエッジとして格納 ## なにもない0とcos = 0を区別できるように-1する dihedral = np.cos(rdMolTransforms.GetDihedralRad(conf,int(i),int(j),int(k),int(l)))-1 dihedrals_min[ij_dict[(i,l)]] = min(angle[ij_dict[(i,l)]], dihedral) dihedrals_max[ij_dict[(i,l)]]= max(angle[ij_dict[(i,l)]], dihedral)\ if angle[ij_dict[(i,l)]] != 0 else dihedral dihedrals_diff = dihedrals_max - dihedrals_min ##------------------- atom_ret = pd.DataFrame({ "molecule_name":molecule_name, "atom_index": df["atom_index"].values, "is_accepter": acceptor, "is_donor": donor, "is_aromatic":aromatic, "is_sp1": hybridization[:,0], "is_sp2": hybridization[:,1], "is_sp3": hybridization[:,2], "is_isotope": isotope, "isin_ring": isin_ring, "is_ring3": n_ring[:,0], "is_ring4": n_ring[:,1], "is_ring5": n_ring[:,2], "is_ring6": n_ring[:,3], }) [bond_type, distance, angle, dihedrals_min, dihedrals_max, dihedrals_diff, conjugate,], edge_index,