def __searchSubStructure(self, oeQueryMol, idxList=None, reverseFlag=False, matchOpts="graph-relaxed"): """Perform a graph match for the input query molecule on the binary database of molecules. The search optionally restricted to the input index list. The sense of the search may be optionally reversed. Args: oeQueryMol (object): query molecule OeGraphMol or OeQmol idxList ([type], optional): [description]. Defaults to None. reverseFlag (bool, optional): [description]. Defaults to False. matchOpts (str, optional): graph match criteria type (graph-strict|graph-relaxed|graph-relaxed-stereo). Defaults to "graph-relaxed". Returns: [type]: [description] """ hL = [] retStatus = True startTime = time.time() try: # logger.info("Query mol type %r", type(oeQueryMol)) atomexpr, bondexpr = OeCommonUtils.getAtomBondExprOpts(matchOpts) ss = oechem.OESubSearch(oeQueryMol, atomexpr, bondexpr) if not ss.IsValid(): retStatus = False logger.error("Unable to initialize substructure search!") return retStatus, hL # searchType = "exhaustive-substructure" if idxList: searchType = "prefilterd-substructure" idxIt = idxList if idxList else range( self.__oeMolDb.GetMaxMolIdx()) for idx in idxIt: mol = oechem.OEGraphMol() ccId = self.__oeMolDb.GetTitle(idx) if not self.__oeMolDb.GetMolecule(mol, idx): logger.error("Unable to read molecule %r at index %r", ccId, idx) continue oechem.OEPrepareSearch(mol, ss) if ss.SingleMatch(mol) != reverseFlag: score = float(oeQueryMol.NumAtoms()) / float( oeQueryMol.NumAtoms()) hL.append( MatchResults(ccId=ccId, searchType=searchType, matchOpts=matchOpts, fpScore=score)) retStatus = True except Exception as e: retStatus = False logger.exception("Failing with %s", str(e)) # logger.info("Substructure search returns %d (%.4f seconds)", len(hL), time.time() - startTime) return retStatus, hL
def keep_molecule(mol, remove_smirks = list()): """ Determines if the molecule will be stored. Parameters ---------- mol - OEMol remove_smirks - list of SMIRKS strings you don't want in your molecules Returns ------- boolean - True (molecule meets the requirements below) - has no metal atoms - no more than 200 heavy atoms - has none of the SMIRKS in remove_smirks list - molecule has appropriate valency """ # Check number of metal atoms if oechem.OECount(mol, oechem.OEIsMetal()) > 0: return False # Check number of heavy atoms if oechem.OECount(mol, oechem.OEIsHeavy()) > 200: return False # Check for patterns in remove smirks list for smirks in remove_smirks: qmol = oechem.OEQMol() if not oechem.OEParseSmarts(qmol, smirks): continue ss = oechem.OESubSearch(qmol) matches = [match for match in ss.Match(mol, False)] if len(matches) > 0: return False # check valency return check_valence(mol)
def get_covalent_warhead_atom(molecule, covalent_warhead_type): """ Get tagged atom index in provided tagged SMARTS string, or None if no match found. Parameters ---------- molecule : openeye.oechem.OEMol The molecule to search covalent_warhead : str Covalent warhead name Returns ------- index : int or None The atom index in molecule of the covalent atom, or None if SMARTS does not match """ smarts = covalent_warhead_smarts[covalent_warhead_type] qmol = oechem.OEQMol() if not oechem.OEParseSmarts(qmol, smarts): raise ValueError(f"Error parsing SMARTS '{smarts}'") substructure_search = oechem.OESubSearch(qmol) substructure_search.SetMaxMatches(1) matches = list() for match in substructure_search.Match(molecule): # Compile list of atom indices that match the pattern tags for matched_atom in match.GetAtoms(): if(matched_atom.pattern.GetMapIdx()==1): return matched_atom.target.GetIdx() return None
def _find_torsions_from_smarts(molecule, smarts): """ Do a substrcutre search on provided SMARTS to find torsions that match the SAMRTS Parameters ---------- molecule: OEMol molecule to search on smarts: str SMARTS pattern to search for Returns ------- tors: list list of torsions that match the SMARTS string """ from openeye import oechem #ToDO use MDL aromaticity model qmol=oechem.OEQMol() if not oechem.OEParseSmarts(qmol, smarts): utils.logger().warning('OEParseSmarts failed') ss = oechem.OESubSearch(qmol) tors = [] oechem.OEPrepareSearch(molecule, ss) unique = True for match in ss.Match(molecule, unique): tor = [] for ma in match.GetAtoms(): tor.append(ma.target) tors.append(tor) return tors
def GetCommonFragments(mollist, frags, atomexpr=oechem.OEExprOpts_DefaultAtoms, bondexpr=oechem.OEExprOpts_DefaultBonds): corefrags = [] from rich.progress import track #for frag in track(frags, description='Finding common fragments'): for frag in frags: ss = oechem.OESubSearch(frag, atomexpr, bondexpr) if not ss.IsValid(): print('Is not valid') continue validcore = True for mol in mollist: oechem.OEPrepareSearch(mol, ss) validcore = ss.SingleMatch(mol) if not validcore: break if validcore: corefrags.append(frag) return corefrags
def __setupSubStructure(self, refmol): """Internal initialization for a substructure comparison.""" # atomexpr, bondexpr = OeCommonUtils.getAtomBondExprOpts(self.__searchType) self.__ss = oechem.OESubSearch(refmol, atomexpr, bondexpr) if self.__verbose: logger.info("Initialize SS (%r)", self.__searchType)
def keep_molecule(mol, max_heavy_atoms = 100, remove_smirks = list(), max_metals = 0, elements = [], check_type = None): if oechem.OECount(mol, oechem.OEIsMetal()) > max_metals: return False if oechem.OECount(mol, oechem.OEIsHeavy()) > max_heavy_atoms: return False # Remove very small molecules that are not interesting if oechem.OECount(mol, oechem.OEIsHeavy()) < 5: return False for smirks in remove_smirks: qmol = oechem.OEQMol() if not oechem.OEParseSmarts(qmol, smirks): continue ss = oechem.OESubSearch(qmol) matches = [match for match in ss.Match(mol, False)] if len(matches) > 0: return False if elements != None: elements_list = read_Elements(elements) if not check_element(mol, elements_list): return False if check_type != None: types = check_type.split(",") if not check_atomtype(mol, types): return False return check_valence(mol)
def OEAddLabel_OEMatch(disp): subs = oechem.OESubSearch("a1aaaaa1") unique = True highlightstyle = oedepict.OEHighlightByBallAndStick(oechem.OELightGreen) for match in subs.Match(disp.GetMolecule(), unique): oedepict.OEAddHighlighting(disp, highlightstyle, match) label = oedepict.OEHighlightLabel("aromatic", oechem.OELightGreen) oedepict.OEAddLabel(disp, label, match)
def OEAddHighlighting_OEMatch(adisp): ligand = adisp.GetDisplayedLigand() subs = oechem.OESubSearch("a1aaaaa1") colors = oechem.OEGetVividColors() unique = True for match, color in zip(subs.Match(ligand, unique), colors): highlight = oedepict.OEHighlightByLasso(color) highlight.SetConsiderAtomLabelBoundingBox(True) oegrapheme.OEAddLigandHighlighting(adisp, highlight, match)
def substructure_search(smi, target_smiles='[!C;!c]-[F,Cl,Br]'): from openeye import oechem ss = oechem.OESubSearch(target_smiles) oemol = oechem.OEGraphMol() oechem.OESmilesToMol(oemol, smi) oechem.OEAddExplicitHydrogens(oemol) oechem.OEPrepareSearch(oemol, ss) return ss.SingleMatch(oemol)
def match_subset(pattern: oechem.OEMol, target: oechem.OEMol): """Check if target is a subset of pattern.""" # Atoms are equal if they have same atomic number (so explicit Hydrogens are needed as well for a match) atomexpr = oechem.OEExprOpts_AtomicNumber # single or double bonds are considered identical (resonance,chirality fix) bondexpr = oechem.OEExprOpts_EqSingleDouble ss = oechem.OESubSearch(pattern, atomexpr, bondexpr) oechem.OEPrepareSearch(target, ss) return ss.SingleMatch(target)
def find_ortho_substituents(frag_smi): from openeye import oechem oemol = oechem.OEGraphMol() oechem.OESmilesToMol(oemol, frag_smi) oechem.OEAddExplicitHydrogens(oemol) ortho = '[!#1]~!@[*;r]~;@[*;r]~!@[!#1]' ss = oechem.OESubSearch(ortho) oechem.OEPrepareSearch(oemol, ss) return ss.SingleMatch(oemol)
def main(argv=[__name__]): itf = oechem.OEInterface(Interface, argv) if not ((itf.HasString("-smarts1") and itf.HasString("-smarts2")) ^ (itf.HasString("-atom1") and itf.HasString("-atom2"))): oechem.OEThrow.Fatal( "-smarts1 and -smarts2 or -atom1 and -atom2 must be set") ifs = oechem.oemolistream() if not ifs.open(itf.GetString("-i")): oechem.OEThrow.Fatal("Unable to open %s for reading" % itf.GetString("-i").rstrip()) ofs = oechem.oemolostream() if itf.HasString("-o"): if not ofs.open(itf.GetString("-o")): oechem.OEThrow.Fatal("Unable to open %s for writing" % itf.GetString("-o")) if itf.HasString("-smarts1") and itf.HasString("-smarts2"): ss1 = oechem.OESubSearch() smarts1 = itf.GetString("-smarts1") if not ss1.Init(smarts1): oechem.OEThrow.Fatal("Unable to parse SMARTS1: %s" % smarts1.rstrip()) ss2 = oechem.OESubSearch() smarts2 = itf.GetString("-smarts2") if not ss2.Init(smarts2): oechem.OEThrow.Fatal("Unable to parse SMARTS2: %s" % smarts2.rstrip()) SmartsPathLength(ifs, ofs, itf, ss1, ss2) else: atom1 = itf.GetString("-atom1") atom2 = itf.GetString("-atom2") AtomPathLength(ifs, ofs, itf, atom1, atom2)
def __init__(self, molComplex, fprintDef=False): self.molComplex = molComplex self.fprint = [] self.fprintConsensus = [] if fprintDef == None: self.fprintDef = "11111111111" else: self.fprintDef = fprintDef #---------------# # OESubSearches # #---------------# # Definition of a Hydrophobe atom # Any atom (Carbon, or Sulfur, or Fluor, or Chloride, or Bromine, # or Iodine) self.ssHydrophobe = oechem.OESubSearch('[C,S,F,Cl,Br,I]') # Definition of a H-Bond donor # All atoms (Oxygen, or Nitrogen, or Sulfur, or Fluor) # connected to (a Hydrogen) self.ssDonor = oechem.OESubSearch('[O,N,S,F][H]') # Definition of a H-Bond acceptor # All atoms (Oxygen, or Nitrogen, or any negatively charged atom) # AND (not a positively charged atom) self.ssAcceptor = oechem.OESubSearch('[O,N,*-;!+]') # Definition of a weak H-bond Acceptor # Any 2 Aromatic atoms connected by aromatic bond, # or 2 Aliphatic atoms connected by double bond, # or 2 Aliphatic atoms connected by triple bond, # self.ssWkAcceptor = OESubSearch("[a:a, A=A, A#A]") self.ssWkAcceptor = oechem.OESubSearch('[A,a]=,#,:[A,a]') # Definition of a weak H-bond Donor # Any (Aromatic carbon atom, or Aliphatic carbon with 3 total bonds # or Aliphatic carbon with 2 total bonds) connected to (a Hydrogen) self.ssWkDonor = oechem.OESubSearch('[c,CX3,CX2][H]') # Definition of a Cation :: Any negatively charged atom self.ssCation = oechem.OESubSearch('[*+]') # Definition of a Anion :: Any positively charged atom self.ssAnion = oechem.OESubSearch('[*-]') # Definition of a metal atom self.ssMetal = oechem.OESubSearch('[Ca,Cd,Co,Cu,Fe,Mg,Mn,Ni,Zn]')
def main(argv=[__name__]): if len(argv) != 5: oechem.OEThrow.Usage("%s <refmol> <fitmol> <outfile> <smarts>" % argv[0]) reffs = oechem.oemolistream() if not reffs.open(argv[1]): oechem.OEThrow.Fatal("Unable to open %s for reading" % argv[1]) if not oechem.OEIs3DFormat(reffs.GetFormat()): oechem.OEThrow.Fatal("Invalid input format: need 3D coordinates") refmol = oechem.OEGraphMol() if not oechem.OEReadMolecule(reffs, refmol): oechem.OEThrow.Fatal("Unable to read molecule in %s" % argv[1]) if not refmol.GetDimension() == 3: oechem.OEThrow.Fatal("%s doesn't have 3D coordinates" % refmol.GetTitle()) fitfs = oechem.oemolistream() if not fitfs.open(argv[2]): oechem.OEThrow.Fatal("Unable to open %s for reading" % argv[2]) if not oechem.OEIs3DFormat(fitfs.GetFormat()): oechem.OEThrow.Fatal("Invalid input format: need 3D coordinates") ofs = oechem.oemolostream() if not ofs.open(argv[3]): oechem.OEThrow.Fatal("Unable to open %s for writing" % argv[3]) if not oechem.OEIs3DFormat(ofs.GetFormat()): oechem.OEThrow.Fatal("Invalid output format: need 3D coordinates") oechem.OEWriteConstMolecule(ofs, refmol) ss = oechem.OESubSearch() if not ss.Init(argv[4]): oechem.OEThrow.Fatal("Unable to parse SMARTS: %s" % argv[4]) oechem.OEPrepareSearch(refmol, ss) if not ss.SingleMatch(refmol): oechem.OEThrow.Fatal("SMARTS fails to match refmol") for fitmol in fitfs.GetOEGraphMols(): if not fitmol.GetDimension() == 3: oechem.OEThrow.Warning("%s doesn't have 3D coordinates" % fitmol.GetTitle()) continue oechem.OEPrepareSearch(fitmol, ss) if not ss.SingleMatch(fitmol): oechem.OEThrow.Warning("SMARTS fails to match fitmol %s" % fitmol.GetTitle()) continue SmartsAlign(refmol, fitmol, ss, ofs)
def _tag_fgroups(mol, fgroups_smarts=None): """ This function tags atoms and bonds of functional groups defined in fgroup_smarts. fgroup_smarts is a dictionary that maps functional groups to their smarts pattern. It can be user generated or from yaml file. Parameters ---------- mol: Openeye OEMolGraph frgroups_smarts: dictionary of functional groups mapped to their smarts pattern. Default is None. It uses 'fgroup_smarts.yaml' Returns ------- fgroup_tagged: dict a dictionary that maps indexed functional groups to corresponding atom and bond indices in mol """ if not fgroups_smarts: # Load yaml file fn = resource_filename('fragmenter', os.path.join('data', 'fgroup_smarts.yml')) f = open(fn, 'r') fgroups_smarts = yaml.safe_load(f) f.close() fgroup_tagged = {} for f_group in fgroups_smarts: qmol = oechem.OEQMol() if not oechem.OEParseSmarts(qmol, fgroups_smarts[f_group]): print('OEParseSmarts failed') ss = oechem.OESubSearch(qmol) oechem.OEPrepareSearch(mol, ss) for i, match in enumerate(ss.Match(mol, True)): fgroup_atoms = set() for ma in match.GetAtoms(): fgroup_atoms.add(ma.target.GetIdx()) tag = oechem.OEGetTag('fgroup') ma.target.SetData(tag, '{}_{}'.format(f_group, str(i))) fgroup_bonds = set() for ma in match.GetBonds(): #if not ma.target.IsInRing(): fgroup_bonds.add(ma.target.GetIdx()) tag =oechem.OEGetTag('fgroup') ma.target.SetData(tag, '{}_{}'.format(f_group, str(i))) fgroup_tagged['{}_{}'.format(f_group, str(i))] = (fgroup_atoms, fgroup_bonds) return fgroup_tagged
def get_atom_map(tagged_smiles, molecule=None): """ Returns a dictionary that maps tag on SMILES to atom index in molecule. Parameters ---------- tagged_smiles: str index-tagged explicit hydrogen SMILES string molecule: OEMol molecule to generate map for. If None, a new OEMol will be generated from the tagged SMILES, the map will map to this molecule and it will be returned. Returns ------- atom_map: dict a dictionary that maps tag to atom index {tag:idx} molecule: OEMol If a molecule was not provided, the generated molecule will be returned. """ if molecule is None: molecule = openeye.smiles_to_oemol(tagged_smiles) ss = oechem.OESubSearch(tagged_smiles) oechem.OEPrepareSearch(molecule, ss) ss.SetMaxMatches(1) atom_map = {} t1 = time.time() matches = [m for m in ss.Match(molecule)] t2 = time.time() seconds = t2 - t1 logger().info("Substructure search took {} seconds".format(seconds)) if not matches: logger().info("MCSS failed for {}, smiles: {}".format( molecule.GetTitle(), tagged_smiles)) return False for match in matches: for ma in match.GetAtoms(): atom_map[ma.pattern.GetMapIdx()] = ma.target.GetIdx() # sanity check mol = oechem.OEGraphMol() oechem.OESubsetMol(mol, match, True) logger().info("Match SMILES: {}".format(oechem.OEMolToSmiles(mol))) if molecule is None: return molecule, atom_map return atom_map
def __subStructureSearch(self, oeQueryMol, idxList, reverseFlag=False): """Perform a graph match for the input query molecule on the binary database of molecules. The search optionally restricted to the input index list. The sense of the search may be optionally reversed. Args: oeQueryMol (object): query molecule OeGraphMol or OeQmol idxList ([type], optional): [description]. Defaults to None. reverseFlag (bool, optional): [description]. Defaults to False. matchOpts (str, optional): graph match criteria type (graph-strict|graph-relaxed|graph-relaxed-stereo). Defaults to "graph-relaxed". Returns: [type]: [description] """ hL = [] sL = [] retStatus = True try: ss = oechem.OESubSearch(oeQueryMol, self.__atomexpr, self.__bondexpr) if not ss.IsValid(): retStatus = False logger.error("Unable to initialize substructure search!") return retStatus, hL, sL # for idx in idxList: mol = oechem.OEGraphMol() ccId = self.__oeMolDb.GetTitle(idx) if not self.__oeMolDb.GetMolecule(mol, idx): logger.error("Unable to read molecule %r at index %r", ccId, idx) continue oechem.OEPrepareSearch(mol, ss) if ss.SingleMatch(mol) != reverseFlag: score = float(oeQueryMol.NumAtoms()) / float( mol.NumAtoms()) hL.append(idx) sL.append((idx, score)) # logger.info("%s queryAtoms %d molAtoms %d score %.4f", ccId, oeQueryMol.NumAtoms(), mol.NumAtoms(), score) retStatus = True except Exception as e: retStatus = False logger.exception("Failing with %s", str(e)) # return retStatus, hL, sL
def match_smirks(smirks: str, oe_molecule: OEMol, unique: bool = False) -> List[Dict[int, int]]: """Attempt to find the indices (optionally unique) of all atoms which match a particular SMIRKS pattern. Parameters ---------- smirks The SMIRKS pattern to match. oe_molecule The molecule to match against. unique Whether to return back only unique matches. Returns ------- A list of all the matches where each match is stored as a dictionary of the smirks indices and their corresponding matched atom indices. """ from openeye import oechem query = oechem.OEQMol() call_openeye( oechem.OEParseSmarts, query, smirks, exception_type=InvalidSmirksError, exception_kwargs={"smirks": smirks}, ) substructure_search = oechem.OESubSearch(query) substructure_search.SetMaxMatches(0) matches = [] for match in substructure_search.Match(oe_molecule, unique): matched_indices = { atom_match.pattern.GetMapIdx() - 1: atom_match.target.GetIdx() for atom_match in match.GetAtoms() if atom_match.pattern.GetMapIdx() != 0 } matches.append(matched_indices) return matches
def __init__(self, infileName, tagname): self.pattyTag = oechem.OEGetTag(tagname) self.smartsList = [] ifs = open(infileName) lines = ifs.readlines() for line in lines: # Strip trailing comments index = line.find('%') if index != -1: line = line[0:index] # Split into tokens. toks = string.split(line) if len(toks) == 2: smarts, type = toks pat = oechem.OESubSearch() pat.Init(smarts) pat.SetMaxMatches(0) self.smartsList.append([pat, type, smarts])
def smirks_search(self, smirks): matches = list() ss = oechem.OESubSearch() if not ss.Init(smirks): # TODO: write custom exceptions? raise ValueError("Error parsing SMIRKS %s" % smirks) for match in ss.Match(self.mol, False): d = dict() for ma in match.GetAtoms(): smirks_idx = ma.pattern.GetMapIdx() # if the map index is 0 then it isn't a "tagged" atom in the SMIRKS if smirks_idx != 0: d[smirks_idx] = self.get_atom_by_index(ma.target.GetIdx()) matches.append(d) return matches
def fixAcid(self, mol): """ Dirty fix that adds a negative formal charge on both Oxygens of the carboxylic acid from ASP and GLU residues. As the charge is attributed to single atoms, some ionic interactions would be missed if proximity is verified with the neutral O atom. """ # SMARTS definition for a carboxilic acid carbox = oechem.OESubSearch("[CX3](=O)[O-]") #print(mol) # Look for pattern in ASP or GLU for match in carbox.Match(mol): for atom in match.GetTargetAtoms(): charge = atom.GetFormalCharge() if charge == 0 and "O" in atom.GetName(): atom.SetFormalCharge(-1) charge = atom.GetFormalCharge()
def main(argv=[__name__]): itf = oechem.OEInterface(InterfaceData) if not oechem.OEParseCommandLine(itf, argv): oechem.OEThrow.Fatal("Unable to interpret command line!") iname = itf.GetString("-in") oname = itf.GetString("-out") smarts = itf.GetString("-smarts") qmol = oechem.OEQMol() if not oechem.OEParseSmarts(qmol, smarts): oechem.OEThrow.Fatal("Invalid SMARTS: %s" % smarts) oechem.OEGenerate2DCoordinates(qmol) ss = oechem.OESubSearch(qmol) if not ss.IsValid(): oechem.OEThrow.Fatal("Unable to initialize substructure search!") ifs = oechem.oemolistream() if not ifs.open(iname): oechem.OEThrow.Fatal("Cannot open input molecule file!") ofs = oechem.oemolostream() if not ofs.open(oname): oechem.OEThrow.Fatal("Cannot open output file!") if not oechem.OEIs2DFormat(ofs.GetFormat()): oechem.OEThrow.Fatal("Invalid output format for 2D coordinates") for mol in ifs.GetOEGraphMols(): oechem.OEPrepareSearch(mol, ss) alignres = oedepict.OEPrepareAlignedDepiction(mol, ss) if not alignres.IsValid(): oechem.OEThrow.Warning( "Substructure is not found in input molecule!") oedepict.OEPrepareDepiction(mol) oechem.OEWriteMolecule(ofs, mol) return 0
def _check_nitro(molecule): """ Filter out nitro that is in ([NX3](=O)=O) form. OEGetReasonableTautomers generates this form. Parameters ---------- molecule : Returns ------- """ from openeye import oechem qmol = oechem.OEQMol() if not oechem.OEParseSmarts(qmol, '([NX3](=O)=O)'): print('OEParseSmarts failed') ss = oechem.OESubSearch(qmol) oechem.OEPrepareSearch(molecule, ss) matches = [m for m in ss.Match(molecule)] return bool(matches)
def smirks_search(self, smirks): cmol = oechem.OEMol(self.mol) matches = list() ss = oechem.OESubSearch() if not ss.Init(smirks): raise ValueError("Error parsing SMIRKS %s" % smirks) # set maximum matches in substructure search to infinite (0 in API) ss.SetMaxMatches(0) for match in ss.Match(cmol, False): d = dict() for ma in match.GetAtoms(): smirks_idx = ma.pattern.GetMapIdx() # if the map index is 0 then it isn't a "tagged" atom in the SMIRKS if smirks_idx !=0: d[smirks_idx] = self.get_atom_by_index(ma.target.GetIdx()) matches.append(d) return matches
def align2d(file1, file2): atomexpr = oechem.OEExprOpts_AtomicNumber | oechem.OEExprOpts_RingMember bondexpr = oechem.OEExprOpts_RingMember ifs1 = oechem.oemolistream(file1) ifs2 = oechem.oemolistream(file2) ifs1.SetConfTest(oechem.OEAbsCanonicalConfTest()) ifs2.SetConfTest(oechem.OEAbsCanonicalConfTest()) popts, dopts, report = prep_pdf_writer() for mol1, mol2 in zip(ifs1.GetOEMols(), ifs2.GetOEMols()): oechem.OESuppressHydrogens(mol1) oechem.OESuppressHydrogens(mol2) oechem.OEGenerate2DCoordinates(mol2) ss = oechem.OESubSearch(mol2, atomexpr, bondexpr) oechem.OEPrepareSearch(mol1, ss) alignres = oedepict.OEPrepareAlignedDepiction(mol1, ss) if not alignres.IsValid(): oechem.OEThrow.Error( "Substructure is not found in input molecule!") cell1 = report.NewCell() cell2 = report.NewCell() oedepict.OEPrepareDepiction(mol1, popts) oedepict.OEPrepareDepiction(mol2, popts) disp1 = oedepict.OE2DMolDisplay(mol1, dopts) disp2 = oedepict.OE2DMolDisplay(mol2, dopts) oedepict.OERenderMolecule(cell1, disp1) oedepict.OERenderMolecule(cell2, disp2) ofs = oechem.oeofstream() if not ofs.open('output.pdf'): oechem.OEThrow.Fatal("Cannot open output file!") oedepict.OEWriteReport(ofs, "pdf", report)
def main(argv=[__name__]): itf = oechem.OEInterface(InterfaceData, argv) if not (itf.GetBool("-c") ^ itf.HasString("-o")): oechem.OEThrow.Fatal("Counting (-c) or output (-o) must be \ specified and are mutually exclusive.") ifs = oechem.oemolistream() filename = itf.GetString("-i") if not ifs.open(filename): oechem.OEThrow.Fatal("Unable to open %s for reading" % filename) ofs = oechem.oemolostream() if not itf.GetBool("-c"): filename = itf.GetString("-o") if not ofs.open(filename): oechem.OEThrow.Fatal("Unable to open %s for writing" % filename) smarts = itf.GetString("-p") ss = oechem.OESubSearch() if not ss.Init(smarts): oechem.OEThrow.Fatal("Unable to parse SMARTS: %s" % smarts) SubSearch(itf, ss, ifs, ofs)
def smirks_search(self, smirks): """ Performs a substructure search on the molecule with the provided SMIRKS pattern. Note - this function expects SMIRKS patterns with indexed atoms that is with :n for at least some atoms. Parameters ---------- smirks: str SMIRKS pattern with indexed atoms (:n) Returns ------- matches: list of dictionaries dictionary for each match with the form {smirks index: atom index} """ cmol = oechem.OEMol(self.mol) matches = list() ss = oechem.OESubSearch() if not ss.Init(smirks): raise ValueError("Error parsing SMIRKS %s" % smirks) # set maximum matches in substructure search to infinite (0 in API) ss.SetMaxMatches(0) for match in ss.Match(cmol, False): d = dict() for ma in match.GetAtoms(): smirks_idx = ma.pattern.GetMapIdx() # if the map index is 0 then it isn't a "tagged" atom in the SMIRKS if smirks_idx != 0: d[smirks_idx] = self.get_atom_by_index(ma.target.GetIdx()) matches.append(d) return matches
def get_atom_map(molecule, mapped_smiles, strict=True): """ Map tag in mapped SMILES to atom idx using a substructure search A substructure search finds chemically equivalent matches so if atoms are symmetrical, they can flip. The mapped SMILES used for the pattern is first used to generate a molecule with the map indices, the order is canonicalized and then it is used for the substructure search pattern. This ensures that symmetrical do not flip but there is no guarantee that it won't happen. Parameters ---------- molecule: oechem.OEMOl Must have explicit hydrogen mapped_smiles: str explicit hydrogen SMILES with map indices on every atom Returns ------- atom_map: dict {map_idx:atom_idx} """ # check that smiles has explicit hydrogen and map indices mapped_mol = oechem.OEMol() oechem.OESmilesToMol(mapped_mol, mapped_smiles) if not has_atom_map(mapped_mol): raise ValueError( "Mapped SMILES must have map indices for all atoms and hydrogens") # Check molecule for explicit hydrogen if not has_explicit_hydrogen(molecule) and strict: raise ValueError("Molecule must have explicit hydrogens") # canonical order mapped mol to ensure atom map is always generated in the same order canonical_order_atoms(mapped_mol) aopts = oechem.OEExprOpts_DefaultAtoms bopts = oechem.OEExprOpts_DefaultBonds ss = oechem.OESubSearch(mapped_mol, aopts, bopts) oechem.OEPrepareSearch(molecule, ss) ss.SetMaxMatches(1) atom_map = {} matches = [m for m in ss.Match(molecule)] if not matches: raise RuntimeError("MCSS failed for {}, smiles: {}".format( oechem.OEMolToSmiles(molecule), mapped_smiles)) for match in matches: for ma in match.GetAtoms(): atom_map[ma.pattern.GetMapIdx()] = ma.target.GetIdx() # sanity check mol = oechem.OEGraphMol() oechem.OESubsetMol(mol, match, True) matched_smiles = mol_to_smiles(mol, isomeric=False, explicit_hydrogen=False, mapped=False) molcopy = oechem.OEMol(molecule) smiles = mol_to_smiles(molcopy, isomeric=False, explicit_hydrogen=False, mapped=False) pattern_smiles = mol_to_smiles(mapped_mol, isomeric=False, explicit_hydrogen=False, mapped=False) if not matched_smiles == smiles == pattern_smiles: raise RuntimeError( "Matched molecule, input molecule and mapped SMILES are not the same " ) return atom_map
# TERMS FOR USE OF SAMPLE CODE The software below ("Sample Code") is # provided to current licensees or subscribers of OpenEye products or # SaaS offerings (each a "Customer"). # Customer is hereby permitted to use, copy, and modify the Sample Code, # subject to these terms. OpenEye claims no rights to Customer's # modifications. Modification of Sample Code is at Customer's sole and # exclusive risk. Sample Code may require Customer to have a then # current license or subscription to the applicable OpenEye offering. # THE SAMPLE CODE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, # EXPRESS OR IMPLIED. OPENEYE DISCLAIMS ALL WARRANTIES, INCLUDING, BUT # NOT LIMITED TO, WARRANTIES OF MERCHANTABILITY, FITNESS FOR A # PARTICULAR PURPOSE AND NONINFRINGEMENT. In no event shall OpenEye be # liable for any damages or liability in connection with the Sample Code # or its use. # @ <SNIPPET> from __future__ import print_function from openeye import oechem mol = oechem.OEGraphMol() oechem.OESmilesToMol(mol, "c1ccccc1C") # create a substructure search object ss = oechem.OESubSearch("c1ccccc1") oechem.OEPrepareSearch(mol, ss) if ss.SingleMatch(mol): print("benzene matches toluene") else: print("benzene does not match toluene") # @ </SNIPPET>