def _standardize_smiles(smiles): """Standardizes a SMILES pattern to be canonical (but not necessarily isomeric) using the `cmiles` library. Parameters ---------- smiles: str The SMILES pattern to standardize. Returns ------- The standardized SMILES pattern. """ from cmiles.utils import load_molecule, mol_to_smiles molecule = load_molecule(smiles, toolkit="rdkit") try: # Try to make the smiles isomeric. smiles = mol_to_smiles( molecule, isomeric=True, explicit_hydrogen=False, mapped=False ) except ValueError: # Fall-back to non-isomeric. smiles = mol_to_smiles( molecule, isomeric=False, explicit_hydrogen=False, mapped=False ) return smiles
def test_add_substituent(): smiles = 'CCCCCC' mol = chemi.smiles_to_oemol(smiles) f = fragmenter.fragment.WBOFragmenter(mol) f.fragment() assert mol_to_smiles(f.fragments[(3, 5)], mapped=False, explicit_hydrogen=False) == 'CCCCC' mol = f.fragments[(3, 5)] atoms = set() bonds = set() for a in mol.GetAtoms(): if a.IsHydrogen(): continue atoms.add(a.GetMapIdx()) for b in mol.GetBonds(): a1 = b.GetBgn() a2 = b.GetEnd() if a1.IsHydrogen() or a2.IsHydrogen(): continue bonds.add((a1.GetMapIdx(), a2.GetMapIdx())) mol = f._add_next_substituent(atoms, bonds, target_bond=(3, 5)) assert mol_to_smiles(mol, mapped=False, explicit_hydrogen=False) == 'CCCCCC'
def test_expand_protonation_states(self): """Test expand protonation states""" smiles = 'C5=C(C1=CN=CC=C1)N=C(NC2=C(C=CC(=C2)NC(C3=CC=C(C=C3)CN4CCN(CC4)C)=O)C)N=C5' molecule = chemi.smiles_to_oemol(smiles) protonation = fragmenter.fragment._expand_states(molecule) protonation_1 = { 'Cc1ccc(cc1Nc2nccc(n2)c3ccc[nH+]c3)NC(=O)c4ccc(cc4)CN5CCN(CC5)C', 'Cc1ccc(cc1Nc2nccc(n2)c3ccc[nH+]c3)NC(=O)c4ccc(cc4)CN5CC[NH+](CC5)C', 'Cc1ccc(cc1Nc2nccc(n2)c3ccc[nH+]c3)NC(=O)c4ccc(cc4)C[NH+]5CCN(CC5)C', 'Cc1ccc(cc1Nc2nccc(n2)c3ccc[nH+]c3)NC(=O)c4ccc(cc4)C[NH+]5CC[NH+](CC5)C', 'Cc1ccc(cc1Nc2nccc(n2)c3cccnc3)NC(=O)c4ccc(cc4)CN5CCN(CC5)C', 'Cc1ccc(cc1Nc2nccc(n2)c3cccnc3)NC(=O)c4ccc(cc4)CN5CC[NH+](CC5)C', 'Cc1ccc(cc1Nc2nccc(n2)c3cccnc3)NC(=O)c4ccc(cc4)C[NH+]5CCN(CC5)C', 'Cc1ccc(cc1Nc2nccc(n2)c3cccnc3)NC(=O)c4ccc(cc4)C[NH+]5CC[NH+](CC5)C', 'Cc1ccc(cc1[N-]c2nccc(n2)c3ccc[nH+]c3)NC(=O)c4ccc(cc4)CN5CCN(CC5)C', 'Cc1ccc(cc1[N-]c2nccc(n2)c3ccc[nH+]c3)NC(=O)c4ccc(cc4)CN5CC[NH+](CC5)C', 'Cc1ccc(cc1[N-]c2nccc(n2)c3ccc[nH+]c3)NC(=O)c4ccc(cc4)C[NH+]5CCN(CC5)C', 'Cc1ccc(cc1[N-]c2nccc(n2)c3ccc[nH+]c3)NC(=O)c4ccc(cc4)C[NH+]5CC[NH+](CC5)C', 'Cc1ccc(cc1[N-]c2nccc(n2)c3cccnc3)NC(=O)c4ccc(cc4)CN5CCN(CC5)C', 'Cc1ccc(cc1[N-]c2nccc(n2)c3cccnc3)NC(=O)c4ccc(cc4)CN5CC[NH+](CC5)C', 'Cc1ccc(cc1[N-]c2nccc(n2)c3cccnc3)NC(=O)c4ccc(cc4)C[NH+]5CCN(CC5)C', 'Cc1ccc(cc1[N-]c2nccc(n2)c3cccnc3)NC(=O)c4ccc(cc4)C[NH+]5CC[NH+](CC5)C' } protonation_2 = set() for mol in protonation: protonation_2.add( mol_to_smiles(mol, mapped=False, explicit_hydrogen=False, isomeric=True)) intersection = protonation_1.intersection(protonation_2) self.assertEqual(len(intersection), len(protonation_1)) self.assertEqual(len(intersection), len(protonation_2))
def test_map_order_geometry(permute, toolkit, toolkit_name): """Test map ordered geometry""" hooh = { 'symbols': ['H', 'O', 'O', 'H'], 'geometry': [ 1.84719633, 1.47046223, 0.80987166, 1.3126021, -0.13023157, -0.0513322, -1.31320906, 0.13130216, -0.05020593, -1.83756335, -1.48745318, 0.80161212 ], 'name': 'HOOH', 'connectivity': [[0, 1, 1], [1, 2, 1], [2, 3, 1]], } mol = utils.load_molecule(hooh, toolkit=toolkit_name, permute_xyz=permute) mapped_smiles = utils.mol_to_smiles(mol, isomeric=True, explicit_hydrogen=True, mapped=True) atom_map = utils.get_atom_map(mol, mapped_smiles) symbols, geometry = toolkit.get_map_ordered_geometry(mol, atom_map) json_geom = np.asarray(hooh['geometry']).reshape(int(len(geometry) / 3), 3) geometry_array = np.asarray(geometry).reshape(int(len(geometry) / 3), 3) for m in atom_map: for i in range(3): assert json_geom[atom_map[m]][i] == pytest.approx( geometry_array[m - 1][i], 0.0000001) if not permute: assert hooh['geometry'] == pytest.approx(geometry, 0.0000001)
def request_callback(request, context): context.status_code = 200 smiles = re.search(r'"smiData"\r\n\r\n(.*?)\r\n', request.text).group(1) cmiles_molecule = load_molecule(smiles, toolkit="rdkit") smiles = mol_to_smiles(cmiles_molecule, isomeric=False, explicit_hydrogen=False, mapped=False) assert smiles == "C" return 'value="/tmp/0000.xml"'
def enumerate_states(self, molecule, title='', json_filename=None): """ enumerate protonation, tautomers and stereoisomers for molecule. Parameters ---------- molecule: any format that OpenEye pareses. Can be path to file containing molecule or SMILES/Inchi string workflow_id: str Which workflow to use as defined in data/workflows.json options: dict, optional, default None dictionary of keyword options. Default is None. If None, will use options defined in workflow ID title: str, optional, default empty string title of molecule. If None, the title of the molecule will be the IUPAC name json_filename: str, optional, default None json filename for states generated. If None will not write json file Returns ------- json_dict: dict dictionary containing canonical isomeric SMILES for states and provenance. """ # Load options for enumerate states routine = 'enumerate_states' options = self.off_workflow.get_options('enumerate_states')['options'] provenance = _get_provenance(workflow_id=self.workflow_id, routine=routine) # if not options: # options = _get_options(workflow_id, routine) molecule = chemi.standardize_molecule(molecule, title=title) can_iso_smiles = mol_to_smiles(molecule, isomeric=True, mapped=False, explicit_hydrogen=False) states = fragment.expand_states(molecule, **options) provenance['routine']['enumerate_states'][ 'parent_molecule'] = can_iso_smiles provenance['routine']['enumerate_states'][ 'parent_molecule_name'] = molecule.GetTitle() json_dict = {'provenance': provenance, 'states': states} if json_filename: json_dict['states'] = list(json_dict['states']) with open(json_filename, 'w') as f: json.dump(json_dict, f, indent=2, sort_keys=True) return json_dict
def test_tagged_smiles(self): """Test index-tagges smiles""" from openeye import oechem inf = get_fn('ethylmethylidyneamonium.mol2') ifs = oechem.oemolistream(inf) inp_mol = oechem.OEMol() oechem.OEReadMolecule(ifs, inp_mol) tagged_smiles = mol_to_smiles(inp_mol, isomeric=True, mapped=True, explicit_hydrogen=True) # Tags should always be the same as mol2 molecule ordering self.assertEqual( tagged_smiles, '[H:5][C:1]#[N+:4][C:3]([H:9])([H:10])[C:2]([H:6])([H:7])[H:8]')
def frag_to_smiles(frags, mol): """ Convert fragments (AtomBondSet) to canonical isomeric SMILES string Parameters ---------- frags: list mol: OEMol OESMILESFlag: str Either 'ISOMERIC' or 'DEFAULT'. This flag determines which OE function to use to generate SMILES string Returns ------- smiles: dict of smiles to frag """ smiles = {} for frag in frags: fragatompred = oechem.OEIsAtomMember(frag.GetAtoms()) fragbondpred = oechem.OEIsBondMember(frag.GetBonds()) #fragment = oechem.OEGraphMol() fragment = oechem.OEMol() adjustHCount = True oechem.OESubsetMol(fragment, mol, fragatompred, fragbondpred, adjustHCount) oechem.OEPerceiveChiral(fragment) # sanity check that all atoms are bonded for atom in fragment.GetAtoms(): if not list(atom.GetBonds()): raise Warning("Yikes!!! An atom that is not bonded to any other atom in the fragment. " "You probably ran into a bug. Please report the input molecule to the issue tracker") #s = oechem.OEMolToSmiles(fragment) #s2 = fragmenter.utils.create_mapped_smiles(fragment, tagged=False, explicit_hydrogen=False) s = mol_to_smiles(fragment, mapped=False, explicit_hydrogen=True, isomeric=True) if s not in smiles: smiles[s] = [] smiles[s].append(frag) return smiles
def test_expand_enantiomers(self): smiles = 'CN(C)C/C=C/C(=O)NC1=C(C=C2C(=C1)C(=NC=N2)NC3=CC(=C(C=C3)F)Cl)O[C@H]4CCOC4' molecule = chemi.smiles_to_oemol(smiles) stereoisomers = fragmenter.fragment._expand_states( molecule, enumerate='stereoisomers') stereoisomers_1 = { 'CN(C)C/C=C/C(=O)Nc1cc2c(cc1O[C@@H]3CCOC3)ncnc2Nc4ccc(c(c4)Cl)F', 'CN(C)C/C=C/C(=O)Nc1cc2c(cc1O[C@H]3CCOC3)ncnc2Nc4ccc(c(c4)Cl)F', 'CN(C)C/C=C\\C(=O)Nc1cc2c(cc1O[C@@H]3CCOC3)ncnc2Nc4ccc(c(c4)Cl)F', 'CN(C)C/C=C\\C(=O)Nc1cc2c(cc1O[C@H]3CCOC3)ncnc2Nc4ccc(c(c4)Cl)F' } stereoisomers_2 = set() for mol in stereoisomers: stereoisomers_2.add( mol_to_smiles(mol, mapped=False, explicit_hydrogen=False, isomeric=True)) intersection = stereoisomers_1.intersection(stereoisomers_2) self.assertEqual(len(intersection), len(stereoisomers_1)) self.assertEqual(len(intersection), len(stereoisomers_2)) self.assertEqual(len(stereoisomers_1), len(stereoisomers_2))
def find_torsions(molecule, restricted=True, terminal=True): #ToDo: Get rid of equivalent torsions. Ex H-C-C-C and C-C-C-H. """ This function takes an OEMol (atoms must be tagged with index map) and finds the map indices for torsion that need to be driven. Parameters ---------- molecule : OEMol The atoms in the molecule need to be tagged with map indices restricted: bool, optional, default True If True, will find restricted torsions such as torsions in rings and double bonds. terminal: bool, optional, default True If True, will find terminal torsions Returns ------- needed_torsion_scans: dict a dictionary that maps internal, terminal and restricted torsions to map indices of torsion atoms """ # Check if molecule has map from openeye import oechem is_mapped = has_atom_map(molecule) if not is_mapped: utils.logger().warning('Molecule does not have atom map. A new map will be generated. You might need a new tagged SMARTS if the ordering was changed') tagged_smiles = mol_to_smiles(molecule, isomeric=True, mapped=True, explicit_hydrogen=True) # Generate new molecule with tags molecule = chemi.smiles_to_oemol(tagged_smiles) utils.logger().warning('If you already have a tagged SMARTS, compare it with the new one to ensure the ordering did not change') utils.logger().warning('The new tagged SMARTS is: {}'.format(tagged_smiles)) # ToDo: save the new tagged SMILES somewhere. Maybe return it? needed_torsion_scans = {'internal': {}, 'terminal': {}, 'restricted': {}} mol = oechem.OEMol(molecule) if restricted: smarts = '[*]~[C,c]=,@[C,c]~[*]' # This should capture double bonds (not capturing rings because OpenEye does not # generate skewed conformations. ToDo: use scan in geometric or something else to get this done. restricted_tors = _find_torsions_from_smarts(molecule=mol, smarts=smarts) if len(restricted_tors) > 0: restricted_tors_min = one_torsion_per_rotatable_bond(restricted_tors) for i, tor in enumerate(restricted_tors_min): tor_name = ((tor[0].GetMapIdx() - 1), (tor[1].GetMapIdx() - 1), (tor[2].GetMapIdx() - 1), (tor[3].GetMapIdx() - 1)) needed_torsion_scans['restricted']['torsion_{}'.format(str(i))] = tor_name if terminal: smarts = '[*]~[*]-[X2H1,X3H2,X4H3]-[#1]' # This smarts should match terminal torsions such as -CH3, -NH2, -NH3+, -OH, and -SH h_tors = _find_torsions_from_smarts(molecule=mol, smarts=smarts) if len(h_tors) > 0: h_tors_min = one_torsion_per_rotatable_bond(h_tors) for i, tor in enumerate(h_tors_min): tor_name = ((tor[0].GetMapIdx() -1 ), (tor[1].GetMapIdx() - 1), (tor[2].GetMapIdx() - 1), (tor[3].GetMapIdx() - 1)) needed_torsion_scans['terminal']['torsion_{}'.format(str(i))] = tor_name mid_tors = [[tor.a, tor.b, tor.c, tor.d ] for tor in oechem.OEGetTorsions(mol)] if mid_tors: mid_tors_min = one_torsion_per_rotatable_bond(mid_tors) for i, tor in enumerate(mid_tors_min): tor_name = ((tor[0].GetMapIdx() - 1), (tor[1].GetMapIdx() - 1), (tor[2].GetMapIdx() - 1), (tor[3].GetMapIdx() - 1)) needed_torsion_scans['internal']['torsion_{}'.format(str(i))] = tor_name # Check that there are no duplicate torsions in mid and h_torsions list_tor = list(needed_torsion_scans['internal'].values()) + list(needed_torsion_scans['terminal'].values()) set_tor = set(list_tor) if not len(set_tor) == len(list_tor): raise Warning("There is a torsion defined in both mid and terminal torsions. This should not happen. Check " "your molecule and the atom mapping") return needed_torsion_scans
def generate_fragments(molecule, generate_visualization=False, strict_stereo=False, combinatorial=True, MAX_ROTORS=2, remove_map=True, json_filename=None): """ This function generates fragments from molecules. The output is a dictionary that maps SMILES of molecules to SMILES for fragments. The default SMILES are generated with openeye.oechem.OEMolToSmiles. These SMILES strings are canonical isomeric SMILES. The dictionary also includes a provenance field which defines how the fragments were generated. Parameters ---------- molecule: OEMol to fragment generate_visualization: bool If true, visualization of the fragments will be written to pdf files. The pdf will be writtten in the directory where this function is run from. combinatorial: bool If true, find all connected fragments from fragments and add all new fragments that have less than MAX_ROTORS MAX_ROTORS: int rotor threshold for combinatorial strict_stereo: bool Note: This applies to the molecule being fragmented. Not the fragments. If True, omega will generate conformation with stereochemistry defined in the SMILES string for charging. remove_map: bool If True, the index tags will be removed. This will remove duplicate fragments. Defualt True json_filename: str filenmae for JSON. If provided, will save the returned dictionary to a JSON file. Default is None Returns ------- fragments: dict mapping of SMILES from the parent molecule to the SMILES of the fragments """ fragments = dict() try: molecules = list(molecule) except TypeError: molecules = [molecule] for molecule in molecules: # normalize molecule molecule = normalize_molecule(molecule, molecule.GetTitle()) if remove_map: # Remove tags from smiles. This is done to make it easier to find duplicate fragments for a in molecule.GetAtoms(): a.SetMapIdx(0) frags = _generate_fragments(molecule, strict_stereo=strict_stereo) if not frags: logger().warning('Skipping {}, SMILES: {}'.format(molecule.GetTitle(), oechem.OECreateSmiString(molecule))) continue charged = frags[0] frags = frags[-1] frag_list = list(frags.values()) if combinatorial: smiles = smiles_with_combined(frag_list, charged, MAX_ROTORS) else: smiles = frag_to_smiles(frag_list, charged) parent_smiles = mol_to_smiles(molecule, isomeric=True, explicit_hydrogen=False, mapped=False) if smiles: fragments[parent_smiles] = list(smiles.keys()) else: # Add molecule where no fragments were found for terminal torsions and / or rings and non rotatable bonds fragments[parent_smiles] = [mol_to_smiles(molecule, isomeric=True, explicit_hydrogen=True, mapped=False)] if generate_visualization: IUPAC = oeiupac.OECreateIUPACName(molecule) name = molecule.GetTitle() if IUPAC == name: name = make_python_identifier(oechem.OEMolToSmiles(molecule))[0] oname = '{}.pdf'.format(name) ToPdf(charged, oname, frags) del charged, frags if json_filename: f = open(json_filename, 'w') j = json.dump(fragments, f, indent=2, sort_keys=True) f.close() return fragments
def expand_states(molecule, protonation=True, tautomers=False, stereoisomers=True, max_states=200, level=0, reasonable=True, carbon_hybridization=True, suppress_hydrogen=True, verbose=True, filename=None, return_smiles_list=False, return_molecules=False): """ Expand molecule states (choice of protonation, tautomers and/or stereoisomers). Protonation states expands molecules to protonation of protonation sites (Some states might only be reasonable in very high or low pH. ToDo: Only keep reasonable protonation states) Tatutomers: Should expand to tautomer states but most of hte results are some resonance structures. Defualt if False for this reason Stereoisomers expands enantiomers and geometric isomers (cis/trans). Returns set of SMILES Parameters ---------- molecule: OEMol Molecule to expand protonation: Bool, optional, default=True If True will enumerate protonation states. tautomers: Bool, optional, default=False If True, will enumerate tautomers. (Note: Default is False because results usually give resonance structures which ins't needed for torsion scans stereoisomers: Bool, optional, default=True If True will enumerate stereoisomers (cis/trans and R/S). max_states: int, optional, default=True maximum states enumeration should find level: int, optional, Defualt=0 The level for enumerating tautomers. It can go up until 7. The higher the level, the more tautomers will be generated but they will also be less reasonable. reasonable: bool, optional, default=True Will rank tautomers enumerated energetically (https://docs.eyesopen.com/toolkits/python/quacpactk/tautomerstheory.html#reasonable-ranking) carbon_hybridization: bool, optional, default=True If True will allow carbons to change hybridization suppress_hydrogen: bool, optional, default=True If true, will suppress explicit hydrogen. It's considered best practice to set this to True when enumerating tautomers. verbose: Bool, optional, default=True filename: str, optional, default=None Filename to save SMILES to. If None, SMILES will not be saved to file. return_smiles_list: bool, optional, default=False If True, will return a list of SMILES with numbered name of molecule. Use this if you want ot write out an smi file of all molecules processed with a unique numbered name for each state. return_molecules: bool, optional, default=False If true, will return list of OEMolecules instead of SMILES Returns ------- states: set of SMILES for enumerated states """ title = molecule.GetTitle() states = set() molecules = [molecule] if verbose: logger().info("Enumerating states for {}".format(title)) if protonation: logger().info("Enumerating protonation states for {}".format(title)) molecules.extend(_expand_states(molecules, enumerate='protonation', max_states=max_states, verbose=verbose, level=level, suppress_hydrogen=suppress_hydrogen)) if tautomers: logger().info("Enumerating tautomers for {}".format(title)) molecules.extend(_expand_states(molecules, enumerate='tautomers', max_states=max_states, reasonable=reasonable, carbon_hybridization=carbon_hybridization, verbose=verbose, level=level, suppress_hydrogen=suppress_hydrogen)) if stereoisomers: logger().info("Enumerating stereoisomers for {}".format(title)) molecules.extend(_expand_states(molecules, enumerate='stereoisomers', max_states=max_states, verbose=verbose)) for molecule in molecules: #states.add(fragmenter.utils.create_mapped_smiles(molecule, tagged=False, explicit_hydrogen=False)) # Not using create mapped SMILES because OEMol is needed but state is OEMolBase. #states.add(oechem.OEMolToSmiles(molecule)) try: states.add(mol_to_smiles(molecule, isomeric=True, mapped=False, explicit_hydrogen=False)) except ValueError: logger().warn("Tautomer or protonation state has a chiral center. Expanding stereoisomers") stereo_states = _expand_states(molecule, enumerate='steroisomers') for state in stereo_states: states.add(mol_to_smiles(molecule, isomeric=True, mapped=False, explicit_hydrogen=False)) logger().info("{} states were generated for {}".format(len(states), oechem.OEMolToSmiles(molecule))) if filename: count = 0 smiles_list = [] for molecule in states: molecule = molecule + ' ' + title + '_' + str(count) count += 1 smiles_list.append(molecule) to_smi(smiles_list, filename) if return_smiles_list: return smiles_list if return_molecules: return molecules return states
def enumerate_states(molecule, tautomers=True, stereoisomers=True, verbose=False, return_mols=False, explicit_h=True, return_names=False, max_stereo_returns=1, filter_nitro=True, **kwargs): """ Expand tautomeric state and stereoisomers for molecule. Parameters ---------- molecule : OEMol Molecule to enumerate states tautomers : bool, optional, default True If False, will not generate tautomers stereoisomers : bool, optional, default True If False, will not generate all stereoisomers. verbose : bool, optional, default False If True, output will be verbose return_mols : bool, optional, default False If True, will return oemols instead of SMILES. Some molecules might be duplicate states explicit_h : bool, optional, default True If True, SMILES of states will have explicit hydrogen return_names : bool, optional, default True If True, will return names of molecules with SMILES max_stereo_returns : int, optional, default 1 If stereoisomers is set to False, and the incoming molecule is missing stereo information, OEFlipper will generate stereoisomers for missing stereo center. max_stereo_returns controls how many of those will be returned ** max_states: int, optional, default 200 This gets passed to `_enumerate_tautomers` and `_enumerate_stereoisomers` max number of states `_enumerate_tautomers` and `_enumerate_stereoisomers` generate ** pka_norm: bool, optional, default True This gets passed to `_enumerate_tautomers`. If True, ionization state of each tautomer will be assigned to a predominate state at pH ~7.4 ** warts: bool, optional, default True This gets passed to `_enumerate_tautomers` and _enumerate_stereoisomers` If True, adds a wart to each new state. A 'wart' is a systematic ** force_flip: bool, optional, default True This gets passed to `_enumerate_stereoisomers` Force flipping all stereocenters. If False, will only generate stereoisomers for stereocenters that are undefined ** enum_nitorgen: bool, optional, default True This gets passed to `_enumerate_stereoisomers` If true, invert non-planer nitrogens Returns ------- states: list list of oemols or SMILES of states generated for molecule """ from openeye import oechem # If incoming molecule has nitro in form ([NX3](=O)=O), do not filter out later if _check_nitro(molecule): filter_nitro = False title = molecule.GetTitle() states = [] if return_names: names = [] if verbose: logger().info("Enumerating states for {}".format(title)) if stereoisomers: if verbose: logger().info("Enumerating stereoisomers for {}".format(title)) stereo_mols = (_enumerate_stereoisomers(molecule, **kwargs)) if verbose: logger().info('Enumerated {} stereoisomers'.format( len(stereo_mols))) if tautomers: if not stereoisomers: stereo_mols = [molecule] tau_mols = [] if verbose: logger().info("Enumerating tautomers states for {}".format(title)) for mol in stereo_mols: tau_mols.extend(_enumerate_tautomers(mol, **kwargs)) if verbose: logger().info('Enumerated {} tautomers'.format(len(tau_mols))) # check for nitro in ([NX3](=O)=O) form if filter_nitro: tau_mols[:] = [mol for mol in tau_mols if not _check_nitro(mol)] if stereoisomers and tautomers: all_mols = stereo_mols + tau_mols elif stereoisomers and not tautomers: all_mols = stereo_mols elif not stereoisomers and tautomers: all_mols = tau_mols all_mols.append(molecule) else: all_mols = [molecule] if return_mols: return all_mols for mol in all_mols: try: smiles = mol_to_smiles(mol, isomeric=True, mapped=False, explicit_hydrogen=explicit_h) if smiles not in states: states.append(smiles) if return_names: names.append(mol.GetTitle()) except ValueError: # Stereo is not fully defined. Use flipper with force_flip set to False stereo_states = _enumerate_stereoisomers(mol, force_flip=False, enum_nitrogen=True, warts=True) if len(stereo_states) > max_stereo_returns: stereo_states = stereo_states[:max_stereo_returns] for state in stereo_states: try: smiles = mol_to_smiles(state, isomeric=True, mapped=False, explicit_hydrogen=explicit_h) except ValueError: stereo_states_forced = _enumerate_stereoisomers( mol, force_flip=True, enum_nitrogen=True, warts=True) if len(stereo_states_forced) > max_stereo_returns: stereo_states_forced = stereo_states_forced[: max_stereo_returns] for state_forced in stereo_states_forced: smiles = mol_to_smiles(state_forced, isomeric=True, mapped=False, explicit_hydrogen=explicit_h) if smiles not in states: states.append(smiles) if return_names: names.append(state.GetTitle()) if smiles not in states: states.append(smiles) if return_names: names.append(state.GetTitle()) if verbose: logger().info("{} states were generated for {}".format( len(states), oechem.OEMolToSmiles(molecule))) if return_names: return states, names return states
def enumerate_fragments(self, molecule, title='', mol_provenance=None, json_filename=None, generate_vis=False): """ Fragment molecule Parameters ---------- molecule: Input molecule. Very permissive. Can be anything that OpenEye can parse SMILES string of molecule to fragment workflow_id: str Which workflow to use for options. options: dictionary, optional, default None Dictionary of keyword options. If None, will use optiond defined in workflows title: str, optional. Default empty str The title or name of the molecule. If empty stirng will use the IUPAC name for molecule title. mol_provenance: dict, optional. Default is None provenance for molecule. If the molecule is a state from enumerate_states, the provenance from enumerate_states should be used json_filename: str, optional. Default None If a filename is provided, will write output to json file. generate_vis: bool, optional, default False If True, will generate visualization of fragments from parent molecule Returns ------- json_dict: dict dictionary containing provenance and fragments. """ routine = 'enumerate_fragments' provenance = _get_provenance(workflow_id=self.workflow_id, routine=routine) options = self.off_workflow.get_options( 'enumerate_fragments')['options'] parent_molecule = chemi.standardize_molecule(molecule, title) parent_molecule_smiles = mol_to_smiles(parent_molecule, isomeric=True, explicit_hydrogen=False, mapped=False) provenance['routine']['enumerate_fragments'][ 'parent_molecule_name'] = parent_molecule.GetTitle() provenance['routine']['enumerate_fragments'][ 'parent_molecule'] = parent_molecule_smiles fragments = fragment.generate_fragments(parent_molecule, generate_vis, **options) if self.states: # Check if current state exists if parent_molecule_smiles in self.states['states']: provenance['routine']['enumerate_states'] = self.states[ 'provenance']['routine']['enumerate_states'] elif mol_provenance: provenance['routine']['enumerate_states'] = mol_provenance[ 'routine']['enumerate_states'] # Generate identifiers for fragments fragments_json_dict = {} for fragm in fragments: for i, frag in enumerate(fragments[fragm]): identifiers = to_molecule_id(frag, canonicalization='openeye') frag = identifiers['canonical_isomeric_smiles'] fragments_json_dict[frag] = {'identifiers': identifiers} fragments_json_dict[frag]['provenance'] = provenance fragments_json_dict[frag]['provenance'][ 'canonicalization'] = identifiers.pop('provenance') if json_filename: with open(json_filename, 'w') as f: json.dump(fragments_json_dict, f, indent=2, sort_keys=True) return fragments_json_dict