def CisCheck(ifs): nrmol = 0 nrcis = 0 mol = oechem.OEGraphMol() while oechem.OEReadMolecule(ifs, mol): nrmol += 1 print("===========================================================") print("Molecule: %s Title: %s" % (nrmol, mol.GetTitle())) if not oechem.OEHasResidues(mol): oechem.OEPerceiveResidues(mol, oechem.OEPreserveResInfo_All) hv = oechem.OEHierView(mol) resiter = oechem.ConstOEHierResidueIter() resiter = hv.GetResidues() while (resiter.IsValid()): res = resiter.Target() resiter.Next() if not oechem.OEIsStandardProteinResidue(res): continue torsion = oechem.OEGetTorsion(res, oechem.OEProtTorType_Omega) if torsion != -100.0: if torsion < math.pi / 2.0 and torsion > -math.pi / 2.0: if resiter.IsValid(): nextres = resiter.Target() oenextres = nextres.GetOEResidue() if oechem.OEGetResidueIndex( oenextres) == oechem.OEResidueIndex_PRO: continue nrcis += 1 oeres = res.GetOEResidue() print("%s %s %2d omega torsion = %.2f degree" % (oeres.GetName(), oeres.GetChainID(), oeres.GetResidueNumber(), torsion * oechem.cvar.Rad2Deg)) print(" %d cis amide bond(s) identified\n" % nrcis)
def set_weighted_dihedral_histograms(mol, itag, work, nrbins): """ Iterates over the dihedral groups and bins the torsional angles for each conformation. The histogram data is then attached to the groups with the given tag. :type mol: oechem.OEMol :type itag: int :type nrbins: int """ angleinc = 360.0 / float(nrbins) # scale and normalize work = list(normalize(np.array(work) - min(work))) for group in mol.GetGroups(oechem.OEHasGroupType(itag)): atoms = oechem.OEAtomVector() for atom in group.GetAtoms(): atoms.append(atom) histogram = [0] * nrbins for idx, conf in enumerate(mol.GetConfs()): rad = oechem.OEGetTorsion(conf, atoms[0], atoms[1], atoms[2], atoms[3]) deg = math.degrees(rad) deg = (deg + 360.0) % 360.0 binidx = int(math.floor((deg / angleinc))) # instaed of 1 add the weight histogram[binidx] += np.exp(work[idx]) group.SetData(itag, histogram)
def extract_molecule_torsion_data(parent_mol, frag_mols=None): """ extract dihedral angle associated with each torsion motif in the input molecule Torsion motifs are represented using generic modified inchi (central two atoms) and specific modified inchi (4 torsion atoms) @param parent_mol: @type parent_mol: oechem.OEGraphMol @return: tuple(str, dict[str, list[float]]) """ if frag_mols is None: frag_mols = get_molecule_torsion_fragments(parent_mol) torsion_data = collections.defaultdict(list) for frag_mol in frag_mols: inchi_key = oechem.OECreateInChIKey(frag_mol) atom_map = get_fragment_to_parent_atom_mapping(parent_mol, frag_mol) try: _, b, c, _ = get_torsion_oeatom_list(frag_mol) for a in b.GetAtoms(oechem.OEIsHeavy()): for d in c.GetAtoms(oechem.OEIsHeavy()): if a.GetIdx() == c.GetIdx() or d.GetIdx() == b.GetIdx(): continue ap = atom_map[a] bp = atom_map[b] cp = atom_map[c] dp = atom_map[d] if (a.GetAtomicNum() == ap.GetAtomicNum() and b.GetAtomicNum() == bp.GetAtomicNum() and c.GetAtomicNum() == cp.GetAtomicNum() and d.GetAtomicNum() == dp.GetAtomicNum()): angle = ( oechem.OEGetTorsion(parent_mol, ap, bp, cp, dp) * oechem.Rad2Deg) torsion_inchi = inchi_key + get_modified_inchi_key( frag_mol, [a, b, c, d]) torsion_data[torsion_inchi].append( (ap.GetIdx(), bp.GetIdx(), cp.GetIdx(), dp.GetIdx(), angle)) except Exception as e: logging.warning(e) continue parent_inchi = get_modified_molecule_inchi(parent_mol) return (parent_inchi, torsion_data)
def set_dihedral(mol, itag): """ Iterates over the dihedral groups and attaches the dihedral angle to the group with the given tag. :type mol: oechem.OEMol :type itag: int """ for group in mol.GetGroups(oechem.OEHasGroupType(itag)): atoms = oechem.OEAtomVector() for atom in group.GetAtoms(): atoms.append(atom) rad = oechem.OEGetTorsion(mol, atoms[0], atoms[1], atoms[2], atoms[3]) deg = math.degrees(rad) deg = (deg + 360.0) % 360.0 group.SetData(itag, deg)
def get_closest_dihedral_angle(mol, dihedral, ref_dihedral, itag): """ Returns the closest torsion angle difference to the reference. :type mol: oechem.OEMol :type dihedral: oechem.OEGroupBase :type ref_dihedral: oechem.OEGroupBase :type itag: int """ closest_angle = float("inf") for conf in mol.GetConfs(): atoms = [a for a in dihedral.GetAtoms()] rad = oechem.OEGetTorsion(conf, atoms[0], atoms[1], atoms[2], atoms[3]) deg = math.degrees(rad) angle_diff = (abs(deg - ref_dihedral.GetData(itag)) + 360) % 360 closest_angle = min(closest_angle, angle_diff) return closest_angle
def generate_torsional_strain(mol): ''' Calculate strain energy of each rotatable bond using attached ML profiles @param mol: OEGraphMol @param gen_confs: bool @param num_confs: int @return: None ''' for bond in mol.GetBonds(oechem.OEIsRotor()): if bond.HasData(ENERGY_PROFILE_TAG): energy_profile = bond.GetData(ENERGY_PROFILE_TAG) x, y = extract_numeric_data_from_profile_str(energy_profile) _, f = get_global_min_interp1d(x, y) bond_strain = 1e10 torsion_atoms_list = extract_torsion_atoms(mol, bond) for torsion_atoms in torsion_atoms_list: a, b, c, d = torsion_atoms angle = oechem.OEGetTorsion(mol, a, b, c, d) * oechem.Rad2Deg strainE = float(f(angle)) if strainE < 0.0: strainE = 0.0 if bond_strain > strainE: bond_strain = strainE tor_atoms_str = ' '.join( list( map(str, [a.GetIdx(), b.GetIdx(), c.GetIdx(), d.GetIdx()]))) bond.SetData(TORSION_ATOMS_FRAGMENT_TAG, tor_atoms_str) bond.SetData(STRAIN_TAG, bond_strain) if mol.HasData(HAS_PROFILES_TAG) and mol.GetData(HAS_PROFILES_TAG): save_profile_as_sd(mol) return mol
def GetThetaIJKLMatrix(mol, iAtoms, jAtom, kAtom, lAtoms, transform=True): ''' Using the given input, calculates a matrix of torsion angles around jk jAtom, kAtom -> OEAtombase, middle two atoms of the torsion iAtoms -> list of N OEAtombase lAtoms -> list of M OEAtombase return a N-by-M matrix of angle theta_ijkl ''' torsions = [] for iAtom in iAtoms: for lAtom in lAtoms: tor_angle = oechem.OEGetTorsion(mol, iAtom, jAtom, kAtom, lAtom) if not transform: torsions.append(tor_angle) else: torsions.append((math.pi + tor_angle) / 4.0) theta_ijkl = np.array(torsions) theta_ijkl = theta_ijkl.reshape(len(iAtoms), len(lAtoms)) return theta_ijkl
def generate_constraint_opt_input(qc_molecule, dihedrals, maximum_rotation=30, interval=5, filename=None): """ Parameters ---------- qc_molecule dihedrals Returns ------- QCFractal optimization jobs input """ from openeye import oechem optimization_jobs = {} tagged_smiles = qc_molecule['identifiers']['canonical_isomeric_explicit_hydrogen_mapped_smiles'] mol = oechem.OEMol() oechem.OESmilesToMol(mol, tagged_smiles) atom_map = get_atom_map(mol, tagged_smiles) coords = chemi.from_mapped_xyz_to_mol_idx_order(qc_molecule['geometry'], atom_map) # convert coord to Angstrom coords = coords * utils.BOHR_2_ANGSTROM conf = mol.GetConfs().next() conf.SetCoords(oechem.OEFloatArray(coords)) # new molecule for setting dihedral angles mol_2 = oechem.OEMol(mol) conf_2 = mol_2.GetConfs().next() coords_2 = oechem.OEFloatArray(conf_2.GetMaxAtomIdx()*3) conf.GetCoords(coords_2) mol_2.DeleteConfs() interval = radians(interval) max_rot = radians(maximum_rotation) for dihedral in dihedrals: #j = 0 dih_idx = dihedrals[dihedral] tor = [] for i in dih_idx: a = mol.GetAtom(oechem.OEHasMapIdx(i+1)) tor.append(a) dih_angle = oechem.OEGetTorsion(conf, tor[0], tor[1], tor[2], tor[3]) for i, angle in enumerate(np.arange(dih_angle-max_rot, dih_angle+max_rot, interval)): newconf = mol.NewConf(coords_2) oechem.OESetTorsion(newconf, tor[0], tor[1], tor[2], tor[3], angle) #new_angle = oechem.OEGetTorsion(newconf, tor[0], tor[1], tor[2], tor[3]) # if new_angle == dih_angle: # j += 1 # if j > 1: # # One equivalent angle should be generated. # logger().warning("Openeye did not generate a new conformer for torsion and angle {} {}. Will not generate" # "qcfractal optimizaiton input".format(dih_idx, angle)) # break if filename: pdb = oechem.oemolostream("{}_{}.pdb".format(filename, i)) oechem.OEWritePDBFile(pdb, newconf) symbols, geometry = chemi.to_mapped_geometry(newconf, atom_map) qc_molecule = copy.deepcopy(qc_molecule) qc_molecule['geometry'] = geometry qc_molecule['symbols'] = symbols degree = degrees(angle) optimization_jobs['{}_{}'.format(dih_idx, int(round(degree)))] = { 'type': 'optimization_input', 'initial_molecule': qc_molecule, 'dihedral': dih_idx, 'constraints': { "set": [{ "type": "dihedral", "indices": dih_idx, "value": degree }] } } return optimization_jobs
def CalculateTorsionSymmetryFunction(self, envMol, num_iter): ''' Takes refAtom coordinates from refMol as reference and calculates the angular symmetry function using envMol atoms Functional form is described in the DFT-NN review article by Behler, page 30, equations 25 and 26 ''' tsf = [] elemList = self.elemList nullRet = [] bond = get_torsion_oebond(envMol) if bond is None: return nullRet jAtom = bond.GetBgn() jcoords = OEDoubleArray(3) if not envMol.GetCoords(bond.GetBgn(), jcoords): return nullRet kAtom = bond.GetEnd() kcoords = OEDoubleArray(3) if not envMol.GetCoords(bond.GetEnd(), kcoords): return nullRet # tsf.append(bond.GetBgn().GetAtomicNum() * bond.GetEnd().GetAtomicNum()); for inum, iElem in enumerate(elemList): if num_iter == 1: iAtoms, icoords = self.GetTorsionEnvAtoms( iElem, bond.GetBgn(), bond.GetEnd(), envMol) else: iAtoms, icoords = self.GetTorsionEnvAtoms( iElem, bond.GetEnd(), bond.GetBgn(), envMol) if len(icoords) == 0: for ita in self.itaVec: for rc in self.rcTorVec: for num1, elem in enumerate(elemList): if num1 < inum: continue tsf.append(0.0) continue ijX, ijY, ijZ, rij, rij2 = GetPairwiseDistanceMatrix( icoords, [jcoords]) for lnum, lElem in enumerate(elemList): if lnum < inum: continue if num_iter == 1: lAtoms, lcoords = self.GetTorsionEnvAtoms( lElem, bond.GetEnd(), bond.GetBgn(), envMol) else: lAtoms, lcoords = self.GetTorsionEnvAtoms( lElem, bond.GetBgn(), bond.GetEnd(), envMol) if len(lcoords) == 0: for ita in self.itaVec: for rc in self.rcTorVec: tsf.append(0.0) continue klX, klY, klZ, rkl, rkl2 = GetPairwiseDistanceMatrix([kcoords], lcoords) ilX, ilY, ilZ, ril, ril2 = GetPairwiseDistanceMatrix( icoords, lcoords) theta_ijkl = GetThetaIJKLMatrix(envMol, iAtoms, jAtom, kAtom, lAtoms) # angular symmetry function for ita in self.itaVec: for rc in self.rcTorVec: rijMat = np.repeat(rij, rkl.size) rijMat = rijMat.reshape(rij.size, rkl.size) rklMat = np.repeat(rkl, rij.size) rklMat = rklMat.reshape(rkl.size, rij.size) rklMat = np.transpose(rklMat) fcRij = np.select( [rijMat <= rc, rijMat > rc], [0.5 * (np.cos(np.pi * rijMat / rc) + 1.0), 0.0]) fcRkl = np.select( [rklMat <= rc, rklMat > rc], [0.5 * (np.cos(np.pi * rklMat / rc) + 1.0), 0.0]) fcRil = np.select( [ril <= rc, ril > rc], [0.5 * (np.cos(np.pi * ril / rc) + 1.0), 0.0]) exponent = ita * (np.square(rijMat) + np.square(rklMat) + np.square(ril)) term1 = np.power( (1 + self.lambda1 * np.cos(theta_ijkl)), self.chi) term2 = np.exp(-exponent) term3 = (fcRij * fcRkl) * fcRil sumIL = np.sum(term1 * term2 * term3) coeff = np.power(2, 1 - self.chi) * sumIL tsf.append(coeff * jAtom.GetAtomicNum() * kAtom.GetAtomicNum()) a, b, c, d = get_torsion_oeatom_list(envMol) tsf.append(oechem.OEGetDistance2(envMol, a, d)) tsf.append(oechem.OEGetDistance2(envMol, b, c)) tsf.append(oechem.OEGetTorsion(envMol, a, b, c, d)) tsf.append(a.GetAtomicNum() * d.GetAtomicNum()) tsf.append(b.GetAtomicNum() * c.GetAtomicNum()) return tsf
def save_profile_as_sd(mol: oechem.OEGraphMol): oechem.OEDeleteSDData(mol, TOTAL_STRAIN_TAG) oechem.OESetSDData(mol, TOTAL_STRAIN_TAG, '') # place holder oechem.OEDeleteSDData(mol, NUM_TORSION_PROFILES_TAG) oechem.OESetSDData(mol, NUM_TORSION_PROFILES_TAG, '') oechem.OEDeleteSDData(mol, NUM_LOW_CONFIDENCE_TORSIONS_TAG) oechem.OESetSDData(mol, NUM_LOW_CONFIDENCE_TORSIONS_TAG, '') strain_arr = np.zeros(1) strain_arr_high_conf_preds = np.zeros(1) num_torsion_profiles = 0 num_low_confidence_torsions = 0 can_torsions = get_canonical_torsions(mol) for num, can_torsion in enumerate(can_torsions): bond = mol.GetBond(can_torsion.b, can_torsion.c) if bond is not None and bond.HasData(ENERGY_PROFILE_TAG): num_torsion_profiles += 1 bond_strains = bond.GetData(STRAIN_TAG) profile_offset = bond.GetData(PROFILE_OFFSET_TAG) if profile_offset < OFFSET_THRESHOLD and ( not bond.HasData(SKIP_TORSION_TAG)): strain_arr_high_conf_preds += np.array(bond_strains) strain_arr += np.array(bond_strains) offset = bond.GetData(PROFILE_OFFSET_TAG) profile_str = bond.GetData(ENERGY_PROFILE_TAG) pred_confidence_value = HIGH_PREDICTION_CONFIDENCE_TAG if offset > OFFSET_THRESHOLD or bond.HasData(SKIP_TORSION_TAG): profile_str = 'LOW CONFIDENCE - ' + profile_str pred_confidence_value = LOW_PREDICTION_CONFIDENCE_TAG num_low_confidence_torsions += 1 #tor_atoms_str = bond.GetData(TORSION_ATOMS_FRAGMENT_TAG) #tor_atoms_str_list = tor_atoms_str.split(':') #a_idx, b_idx, c_idx, d_idx = list(map(int, tor_atoms_str_list[0].split())) tor_atoms_str1 = bond.GetData(TORSION_ATOMS_FRAGMENT_TAG) ca, cb, cc, cd = list(map(int, tor_atoms_str1.split())) apStr = "{}:{}:{}:{}".format(ca + 1, cb + 1, cc + 1, cd + 1) atom_ca = mol.GetAtom(oechem.OEHasAtomIdx(ca)) atom_cb = mol.GetAtom(oechem.OEHasAtomIdx(cb)) atom_cc = mol.GetAtom(oechem.OEHasAtomIdx(cc)) atom_cd = mol.GetAtom(oechem.OEHasAtomIdx(cd)) angle_float = oechem.OEGetTorsion(mol, atom_ca, atom_cb, atom_cc, atom_cd) * oechem.Rad2Deg sd_tag1 = 'TORSION_%s_ATOMS' % (num + 1) sd_tag2 = 'TORSION_%d_TORSIONNET_%s' % (num + 1, ENERGY_PROFILE_TAG) sd_tag3 = 'TORSION_%d_TORSIONNET_PRED_CONFIDENCE' % (num + 1) sd_tag4 = 'TORSION_%d_TORSIONNET_PROFILE_OFFSET' % (num + 1) oechem.OEDeleteSDData(mol, sd_tag1) oechem.OEDeleteSDData(mol, sd_tag2) oechem.OEDeleteSDData(mol, sd_tag3) oechem.OEDeleteSDData(mol, sd_tag4) oechem.OESetSDData(mol, sd_tag1, apStr) oechem.OESetSDData(mol, sd_tag2, profile_str) sd_tag6 = 'TORSION_%d_%s' % ((num + 1), STRAIN_TAG) oechem.OEDeleteSDData(mol, sd_tag6) oechem.OESetSDData(mol, sd_tag6, '%.1f' % bond_strains) angle = '%.1f' % angle_float sd_tag5 = 'TORSION_%d_ANGLE' % (num + 1) oechem.OEDeleteSDData(mol, sd_tag5) oechem.OESetSDData(mol, sd_tag5, angle) oechem.OESetSDData(mol, sd_tag3, pred_confidence_value) oechem.OESetSDData(mol, sd_tag4, '%.2f' % offset) strain_str = '%.1f' % strain_arr_high_conf_preds[0] oechem.OESetSDData(mol, TOTAL_STRAIN_TAG, strain_str) oechem.OESetSDData(mol, NUM_TORSION_PROFILES_TAG, str(num_torsion_profiles)) oechem.OESetSDData(mol, NUM_LOW_CONFIDENCE_TORSIONS_TAG, str(num_low_confidence_torsions)) reorder_sd_props(mol) return mol
def cal_molecule_torsion_strain(mol, profiles_map): """ @type mol: oechem.OEGraphMol|oechem.OEMol :param mol: :param profiles_map: :return: """ if type(mol) is oechem.OEMol: graph_mol = oechem.OEGraphMol(mol.GetActive()) data = extract_molecule_torsion_data(graph_mol) else: data = extract_molecule_torsion_data(mol) if data is not None: _, tor_map = data if type(mol) is oechem.OEGraphMol: for tor_inchi, tor_data_list in tor_map.items(): if tor_inchi in profiles_map: for tor_data in tor_data_list: _, b_idx, c_idx, _, angle = tor_data bond = mol.GetBond( mol.GetAtom(oechem.OEHasAtomIdx(b_idx)), mol.GetAtom(oechem.OEHasAtomIdx(c_idx)), ) if bond is not None: strain_energy = profiles_map[tor_inchi](angle) if strain_energy < 0: strain_energy = 0 if (bond.HasData(STRAIN_TAG) and bond.GetData(STRAIN_TAG) > strain_energy): bond.SetData(STRAIN_TAG, strain_energy) total_strain = 0.0 for bond in mol.GetBonds(): if bond.HasData(STRAIN_TAG): total_strain += bond.GetData(STRAIN_TAG) mol.SetData(STRAIN_TAG, total_strain) elif type(mol) is oechem.OEMol: for conf in mol.GetConfs(): bondIdx2energy = {} bondIdx2profile = {} bondIdx2toratoms = {} bondIdx2angles = {} for tor_inchi, tor_data_list in tor_map.items(): if tor_inchi in profiles_map: for tor_data in tor_data_list: a_idx, b_idx, c_idx, d_idx, _ = tor_data b_atm = conf.GetAtom(oechem.OEHasAtomIdx(b_idx)) c_atm = conf.GetAtom(oechem.OEHasAtomIdx(c_idx)) bond = conf.GetBond(b_atm, c_atm) if bond is not None: a_atm = conf.GetAtom( oechem.OEHasAtomIdx(a_idx)) d_atm = conf.GetAtom( oechem.OEHasAtomIdx(d_idx)) angle = (oechem.OEGetTorsion( conf, a_atm, b_atm, c_atm, d_atm) * oechem.Rad2Deg) strain_energy = float( profiles_map[tor_inchi](angle)) if strain_energy < 0: strain_energy = 0 x = range(-165, 181, 15) y = [] for a in x: y.append(float(profiles_map[tor_inchi](a))) energy_profile = generate_energy_profile_sd_data_1d( list(zip(x, y))) bondIdx = bond.GetIdx() if bondIdx not in bondIdx2energy: bondIdx2energy[bondIdx] = strain_energy bondIdx2profile[bondIdx] = energy_profile bondIdx2toratoms[bondIdx] = [ a_idx + 1, b_idx + 1, c_idx + 1, d_idx + 1, ] bondIdx2angles[bondIdx] = angle elif bondIdx2energy[bondIdx] > strain_energy: bondIdx2energy[bondIdx] = strain_energy bondIdx2profile[bondIdx] = energy_profile bondIdx2toratoms[bondIdx] = [ a_idx + 1, b_idx + 1, c_idx + 1, d_idx + 1, ] bondIdx2angles[bondIdx] = angle # sd property place holder oechem.OESetSDData(conf, "QM_STRAIN", "0.0") oechem.OESetSDData(conf, "NUM_QM_TORSION_PROFILES", "0") oechem.OESetSDData(conf, "NUM_MISSING_QM_TORSIONS", "-1") total_strain = 0.0 tor_idx = 1 for tor_count, bond in enumerate( conf.GetBonds(oechem.OEIsRotor())): bidx = bond.GetIdx() if bidx in bondIdx2energy: total_strain += bondIdx2energy[bidx] tmp = ":1%".join(list(map(str, bondIdx2toratoms[bidx]))) tor_atomprop = "cs1:0:1;1%" + tmp oechem.OESetSDData( conf, "QM_TORSION_ATOMS_%d_" % tor_idx + "ATOMPROP", tor_atomprop, ) oechem.OESetSDData( conf, "QM_TORSION_%d_" % tor_idx + ENERGY_PROFILE_TAG, bondIdx2profile[bidx], ) oechem.OESetSDData( conf, "TORSION_ANGLE_%d" % tor_idx, "%.1f" % bondIdx2angles[bidx], ) oechem.OESetSDData( conf, "QM_STRAIN_TORSION_%d" % tor_idx, "%.1f" % bondIdx2energy[bidx], ) tor_idx += 1 oechem.OESetSDData(conf, "QM_STRAIN", "%.1f" % total_strain) oechem.OESetSDData(conf, "NUM_QM_TORSION_PROFILES", "%d" % (tor_count + 1)) num_missing_torsions = (tor_count + 1) - (tor_idx - 1) oechem.OESetSDData(conf, "NUM_MISSING_QM_TORSIONS", "%d" % num_missing_torsions) conf.SetData(STRAIN_TAG, total_strain)