def test_generate_conformers(): mol = chemi.smiles_to_oemol('CCCCCCC') confs = chemi.generate_conformers(mol, max_confs=1) assert confs.GetMaxConfIdx() == 1 confs = chemi.generate_conformers(mol) assert confs.GetMaxConfIdx() == 3
def test_2D_conformation(self): """Test checking for 2D conformation""" from fragmenter import chemi, states mol = chemi.smiles_to_oemol('CCCC') states = states.enumerate_states(mol, return_mols=True) for state in states: self.assertFalse(chemi.has_conformer(state, check_two_dimension=True)) conf = chemi.generate_conformers(mol, max_confs=1) self.assertTrue(chemi.has_conformer(conf, check_two_dimension=True))
def test_to_mapped_xyz(): from openeye import oechem smiles = 'HC(H)(C(H)(H)OH)OH' mapped_smiles = '[H:5][C:1]([H:6])([C:2]([H:7])([H:8])[O:4][H:10])[O:3][H:9]' mol = cmiles.utils.load_molecule(smiles) mapped_mol = cmiles.utils.load_molecule(mapped_smiles) with pytest.raises(ValueError): chemi.to_mapped_xyz(mapped_mol) # generate conformer mol = chemi.generate_conformers(mol, max_confs=1) mapped_mol = chemi.generate_conformers(mapped_mol, max_confs=1) atom_map = cmiles.utils.get_atom_map(mol, mapped_smiles) with pytest.raises(ValueError): chemi.to_mapped_xyz(mol) xyz_1 = chemi.to_mapped_xyz(mol, atom_map) xyz_2 = chemi.to_mapped_xyz(mapped_mol) xyz_1 = sorted(xyz_1.split('\n')[2:-1]) xyz_2 = sorted(xyz_2.split('\n')[2:-1]) assert xyz_1 == xyz_2
def canonical_order_conformer(smiles): """Test that geometry is ordered the same way every time no matter the SMILES used to create the molecule""" import cmiles mapped_smiles = '[H:5][C:1]([H:6])([C:2]([H:7])([H:8])[O:4][H:10])[O:3][H:9]' mol_id_oe = cmiles.to_molecule_id(mapped_smiles, canonicalization='openeye') oemol = cmiles.utils.load_molecule(mapped_smiles, toolkit='openeye') # Generate canonical geometry conf = chemi.generate_conformers(oemol, can_order=True, max_confs=1) mapped_symbols, mapped_geometry = cmiles._cmiles_oe.get_map_ordered_geometry( conf, mapped_smiles) # #mapped_symbols = ['C', 'C', 'O', 'O', 'H', 'H', 'H', 'H', 'H', 'H'] # mapped_geometry = [-1.6887193912042044, 0.8515190939276903, 0.8344587822904272, -4.05544806361675, -0.3658269566455062, # -0.22848169646448416, -1.6111611950422127, 0.4463128276938808, 3.490617694146934, -3.97756355964586, # -3.0080934853087373, 0.25948499322223956, -1.6821252026076652, 2.891135395246369, 0.4936556190978574, # 0.0, 0.0, 0.0, -4.180315034973438, -0.09210893239246959, -2.2748227320305525, -5.740516456782416, # 0.4115539217904015, 0.6823267491485907, -0.07872657410528058, 1.2476492272884379, 4.101615944163073, # -5.514569080545831, -3.7195945404657222, -0.4441653010509862] mol = cmiles.utils.load_molecule(smiles, toolkit='openeye') # if not cmiles.utils.has_explicit_hydrogen(mol): # mol = utils.add_explicit_hydrogen(mol) atom_map = cmiles.utils.get_atom_map(mol, mapped_smiles=mapped_smiles) # use the atom map to add coordinates to molecule. First reorder mapped geometry to order in molecule mapped_coords = np.array(mapped_geometry, dtype=float).reshape( int(len(mapped_geometry) / 3), 3) coords = np.zeros((mapped_coords.shape)) for m in atom_map: coords[atom_map[m]] = mapped_coords[m - 1] # flatten coords = coords.flatten() # convert to Angstroms coords = coords * cmiles.utils.BOHR_2_ANGSTROM # set coordinates in oemol mol.SetCoords(coords) mol.SetDimension(3) # Get new atom map atom_map = cmiles.utils.get_atom_map(mol, mapped_smiles) symbols, geometry = cmiles._cmiles_oe.get_map_ordered_geometry( mol, mapped_smiles) assert geometry == mapped_geometry assert symbols == mapped_symbols
def test_mapped_xyz(self): """Test writing out mapped xyz""" from openeye import oechem, oeomega tagged_smiles = '[H:10][c:4]1[c:3]([c:2]([c:1]([c:6]([c:5]1[H:11])[H:12])[C:7]([H:13])([H:14])[H:15])[H:8])[H:9]' mol_1 = chemi.smiles_to_oemol('Cc1ccccc1') inf = get_fn('toluene.mol2') ifs = oechem.oemolistream(inf) mol_2 = oechem.OEMol() oechem.OEReadMolecule(ifs, mol_2) mol_1 = chemi.generate_conformers(mol_1, max_confs=1) atom_map = get_atom_map(mol_1, tagged_smiles) for i, mapping in enumerate(atom_map): atom_1 = mol_1.GetAtom(oechem.OEHasAtomIdx(atom_map[mapping])) atom_1.SetAtomicNum(i + 1) atom_2 = mol_2.GetAtom(oechem.OEHasAtomIdx(mapping - 1)) atom_2.SetAtomicNum(i + 1) xyz_1 = chemi.to_mapped_xyz(mol_1, atom_map, xyz_format=False) # molecule generated from mol2 should be in the right order. atom_map_mol2 = { 1: 0, 2: 1, 3: 2, 4: 3, 5: 4, 6: 5, 7: 6, 8: 7, 9: 8, 10: 9, 11: 10, 12: 11, 13: 12, 14: 13, 15: 14 } xyz_2 = chemi.to_mapped_xyz(mol_2, atom_map_mol2, xyz_format=False) for ele1, ele2 in zip(xyz_1.split('\n')[:-1], xyz_2.split('\n')[:-1]): self.assertEqual(ele1.split(' ')[2], ele2.split(' ')[2])
}) if job_label in torsiondrive_jobs: continue # Map dihedral onto new atom map # Add explicit hydrogen oechem.OEAddExplicitHydrogens(map_to_parent_mol) cmiles._cmiles_oe.canonical_order_atoms(map_to_parent_mol) map_to_parent = cmiles.utils.get_atom_map(map_to_parent_mol, map_to_parent) dih = [map_to_parent[d + 1] for d in dihedral_in_parent] # Generate starting conformations. First add hydrogens oechem.OEAddExplicitHydrogens(map_to_parent_mol) conformers = chemi.generate_conformers(map_to_parent_mol, strict_types=False, max_confs=10) # Genereate QCSchema molecules qcschema_mols = [ cmiles.utils.mol_to_map_ordered_qcschema(conf, can_smiles) for conf in conformers.GetConfs() ] torsiondrive_jobs[job_label] = { 'initial_molecule': qcschema_mols, 'dihedral': [dih], 'grid': [15], 'identifiers': filtered_frags[frag]['identifiers'], 'provenance': { 'canonicalization': filtered_frags[frag]['provenance']['canonicalization']
# Drop duplicates canonical_smiles = cmiles_ids['canonical_smiles'] if canonical_smiles in processed_canonical_smiles: logging.info('Found duplicate canonical SMILES {}'.format(canonical_smiles)) duplicates.append(canonical_smiles) continue else: processed_canonical_smiles.append(canonical_smiles) # Generate molecule using mapped SMILES mapped_smiles = cmiles_ids['canonical_isomeric_explicit_hydrogen_mapped_smiles'] m = cmiles.utils.load_molecule(s) try: # Omega fails for some molecules. conformers = chemi.generate_conformers(m) except RuntimeError: logging.info('Omega failed to generate conformers for {}'.format(cmiles_ids['canonical_isomeric_smiles'])) # Omega failed omega_failures.append(cmiles_ids['canonical_isomeric_smiles']) continue qcschema_molecules = [cmiles.utils.mol_to_map_ordered_qcschema(conf, mapped_smiles) for conf in conformers.GetConfs()] optimization_input.append({'initial_molecules': qcschema_molecules, 'cmiles_identifiers': cmiles_ids}) # Write to SDF oechem.OEWriteMolecule(ofs, conformers) import gzip with gzip.open('optimization_inputs.json.gz', 'w') as f: f.write(json.dumps(optimization_input, indent=2, sort_keys=True).encode('utf-8'))
def generate_torsiondrive_input(self, frag, json_filename=None): """ Generate input for torsiondrive QCFractal portal Parameters ---------- fragment_dict: dict dictionary with fragment identifiers and provenance workflow_id: str workflow to use for options options: dict, optional, default None Keyword options. If None will use options defined in workflow json_filename: str, optional, default None If given will write jobs to json file Returns ------- torsiondrive_inputs: dictionary defining the molecule and torsiondrive job options. """ options = self.off_workflow.get_options('torsiondrive_input') provenance = _get_provenance(workflow_id=self.workflow_id, routine='torsiondrive_input') frag['provenance']['routine']['torsiondrive_input'] = provenance[ 'routine']['torsiondrive_input'] provenance = frag['provenance'] mol_id = frag['identifiers'] mapped_smiles = mol_id[ 'canonical_isomeric_explicit_hydrogen_mapped_smiles'] mapped_mol = chemi.smiles_to_oemol(mapped_smiles) needed_torsions = torsions.find_torsions(mapped_mol, options['restricted']) if options['multiple_confs']: # Generate grid of multiple conformers dihedrals = [] for torsion_type in needed_torsions: for tor in needed_torsions[torsion_type]: dihedrals.append(needed_torsions[torsion_type][tor]) intervals = options['initial_conf_grid_resolution'] if not isinstance(intervals, list): intervals = [intervals] * len(dihedrals) try: conformers = chemi.generate_grid_conformers( mapped_mol, dihedrals=dihedrals, intervals=intervals) except RuntimeError: utils.logger().warning( "{} does not have coordinates. This can happen for several reasons related to Omega. " "{} will not be included in fragments dictionary".format( mol_id['canonical_isomeric_smiles'], mol_id['canonical_isomeric_smiles'])) return False chemi.resolve_clashes(conformers) qcschema_molecules = [ mol_to_map_ordered_qcschema(conf, mol_id) for conf in conformers.GetConfs() ] try: conformer = chemi.generate_conformers(mapped_mol, max_confs=1) # resolve clashes qcschema_molecule = mol_to_map_ordered_qcschema(conformer, mol_id) except RuntimeError: utils.logger().warning( "{} does not have coordinates. This can happen for several reasons related to Omega. " "{} will not be included in fragments dictionary".format( mol_id['canonical_isomeric_smiles'], mol_id['canonical_isomeric_smiles'])) return False identifier = mol_id[ 'canonical_isomeric_explicit_hydrogen_mapped_smiles'] torsiondrive_inputs = { identifier: { 'torsiondrive_input': {}, 'provenance': provenance } } restricted_torsions = needed_torsions.pop('restricted') optimization_jobs = torsions.generate_constraint_opt_input( qcschema_molecule, restricted_torsions, **options['restricted_optimization_options']) torsiondrive_inputs[identifier][ 'optimization_input'] = optimization_jobs torsiondrive_jobs = torsions.define_torsiondrive_jobs( needed_torsions, **options['torsiondrive_options']) if options['multiple_confs']: qcschema_molecule = qcschema_molecules # Currently, all jobs are started from same initial conformation # ToDo Start later job from optimized conformers from last job for i, job in enumerate(torsiondrive_jobs): torsiondrive_input = {'type': 'torsiondrive_input'} torsiondrive_input['initial_molecule'] = qcschema_molecule #torsiondrive_input['initial_molecule']['identifiers'] = mol_id torsiondrive_input['dihedrals'] = torsiondrive_jobs[job][ 'dihedrals'] torsiondrive_input['grid_spacing'] = torsiondrive_jobs[job][ 'grid_spacing'] job_name = '' for i, torsion in enumerate(torsiondrive_input['dihedrals']): if i > 0: job_name += '_{}'.format(torsion) else: job_name += '{}'.format(torsion) torsiondrive_inputs[identifier]['torsiondrive_input'][ job_name] = torsiondrive_input if json_filename: with open(json_filename, 'w') as f: json.dump(torsiondrive_inputs, f, indent=2, sort_keys=True) return torsiondrive_inputs