def test_from_smiles(self): """Test creation of a openforcefield Topology object from a SMILES string""" topology = Topology.from_molecules(self.ethane_from_smiles) assert topology.n_reference_molecules == 1 assert topology.n_topology_molecules == 1 assert topology.n_topology_atoms == 8 assert topology.n_topology_bonds == 7 assert topology.n_topology_particles == 8 assert topology.n_topology_virtual_sites == 0 assert topology.box_vectors is None assert len(topology.constrained_atom_pairs.items()) == 0 topology.add_molecule(self.ethane_from_smiles) assert topology.n_reference_molecules == 1 assert topology.n_topology_molecules == 2 assert topology.n_topology_atoms == 16 assert topology.n_topology_bonds == 14 assert topology.n_topology_particles == 16 assert topology.n_topology_virtual_sites == 0 assert topology.box_vectors is None assert len(topology.constrained_atom_pairs.items()) == 0
def smirnoff_analyze_torsions(forcefield, off_mol): """ Compute the coverage of all torsions in this molecule Parameters ---------- forcefield: openforcefield.typing.engines.smirnoff.ForceField The forcefield object for computing coverage off_mol: openforcefield.topology.Molecule The molecule object for computing torsions coverage Returns ------- torsions_coverage: dict Key is smirks for the torsion, value is a list of torsion indices {SMIRKs: [(0,1,2,3), (2,4,6,7), ..] } """ torsions_coverage = defaultdict(list) off_top = Off_Topology.from_molecules(off_mol) for torsion_indices, torsion_param in forcefield.label_molecules( off_top)[0]['ProperTorsions'].items(): torsions_coverage[torsion_param.smirks].append(torsion_indices) return torsions_coverage
def check_molecule(molecule, test_ff=None): """ run a few checks for a QCElemental Molecule """ import tempfile qcjson_mol = molecule.dict(encoding='json') oemol = cmiles.utils.load_molecule(qcjson_mol) success = True err_msg = "" cwd = os.getcwd() # write a test.mol2 file in a temp dir for checking with tempfile.TemporaryDirectory() as tmpdirname: os.chdir(tmpdirname) ofs.open('test.mol2') oechem.OEWriteMolecule(ofs, oemol) ofs.close() # test if bonds changed bond_set = {(a,b) for a,b,v in molecule.connectivity} if not check_connectivity(bond_set, 'test.mol2'): success = False err_msg = "Bonds changed after rebuild" # test if can be created by the test_ff if success == True and test_ff != None: from openforcefield.topology import Molecule as Off_Molecule from openforcefield.topology import Topology as Off_Topology try: off_molecule = Off_Molecule.from_file('test.mol2') off_topology = Off_Topology.from_molecules(off_molecule) test_ff.create_openmm_system(off_topology) except Exception as e: success = False err_msg = str(e) # test if this molecule has hydrogen bonds if not check_hbond('test.mol2'): success = False err_msg = 'One or more hydrogen bond found' # go back to orig dir os.chdir(cwd) return success, err_msg
def _openeye_parameteriser(cls, mol, **kwargs): """ Creates a parameterised system from openeye molecule Parameters ---------- mol : oechem.OEMol """ try: forcefield = ForceField('test_forcefields/smirnoff99Frosst.offxml') molecule = Molecule.from_openeye( mol, allow_undefined_stereo=cls.allow_undefined_stereo) from openforcefield.utils.toolkits import OpenEyeToolkitWrapper molecule.compute_partial_charges_am1bcc( toolkit_registry=OpenEyeToolkitWrapper()) topology = Topology.from_molecules(molecule) openmm_system = forcefield.create_openmm_system( topology, charge_from_molecules=[molecule]) ligand_pmd = parmed.openmm.topsystem.load_topology( topology.to_openmm(), openmm_system, molecule._conformers[0]) except Exception as e: raise ValueError("Parameterisation Failed : {}".format(e)) #TODO # ligand_pmd.title = cls.smiles for i in ligand_pmd.residues: i.name = 'LIG' tmp_dir = tempfile.mkdtemp() # We need all molecules as both pdb files (as packmol input) # and mdtraj.Trajectory for restoring bonds later. pdb_filename = tempfile.mktemp(suffix=".pdb", dir=tmp_dir) from openeye import oechem # OpenEye Python toolkits oechem.OEWriteMolecule(oechem.oemolostream(pdb_filename), mol) return pdb_filename, ligand_pmd
def min_ffxml(mol, ofs, ffxml): # make copy of the input mol oe_mol = oechem.OEGraphMol(mol) try: # create openforcefield molecule ==> prone to triggering Exception off_mol = Molecule.from_openeye(oe_mol) # load in force field ff = ForceField(ffxml) # create components for OpenMM system topology = Topology.from_molecules(molecules=[off_mol]) # create openmm system ==> prone to triggering Exception #system = ff.create_openmm_system(topology, charge_from_molecules=[off_mol]) system = ff.create_openmm_system(topology) except Exception: smilabel = oechem.OEGetSDData(oe_mol, "SMILES QCArchive") print(' >>> openforcefield failed to create OpenMM system: ' f'{oe_mol.GetTitle()} {smilabel}') return positions = structure.extractPositionsFromOEMol(oe_mol) # minimize structure with ffxml newpos, energy = run_openmm(topology, system, positions) # save geometry, save energy as tag, write mol to file oe_mol.SetCoords(oechem.OEFloatArray(newpos)) oechem.OESetSDData(oe_mol, "Energy FFXML", str(energy)) oechem.OEWriteConstMolecule(ofs, oe_mol) return
def get_parameters_from_forcefield(self, forcefield, molecule): """ It returns the parameters that are obtained with the supplied forcefield for a certain offpele's molecule. Parameters ---------- forcefield : str or an openforcefield.typing.engines.smirnoff.ForceField object The forcefield from which the parameters will be obtained molecule : an offpele.topology.Molecule The offpele's Molecule object Returns ------- openforcefield_parameters : an OpenForceFieldParameters object The OpenForceFieldParameters object """ from openforcefield.typing.engines.smirnoff import ForceField from openforcefield.topology import Topology off_molecule = molecule.off_molecule topology = Topology.from_molecules([off_molecule]) if isinstance(forcefield, str): forcefield = ForceField(forcefield) elif isinstance(forcefield, ForceField): pass else: raise Exception('Invalid forcefield type') molecule_parameters_list = forcefield.label_molecules(topology) assert len(molecule_parameters_list) == 1, 'A single molecule is ' \ 'expected' return self.OpenForceFieldParameters(molecule_parameters_list[0])
def test_from_smiles_unique_mols(self): """Test the addition of two different molecules to a topology""" topology = Topology.from_molecules( [self.ethane_from_smiles, self.propane_from_smiles]) assert topology.n_topology_molecules == 2 assert topology.n_reference_molecules == 2
def smirnoff_analyze_parameter_coverage(forcefield, targets): printcool("SMIRNOFF Parameter Coverage Analysis") assert hasattr(forcefield, 'offxml'), "Only SMIRNOFF Force Field is supported" parameter_assignment_data = defaultdict(list) parameter_counter = Counter() # The openforcefield.typing.engines.smirnoff.ForceField object should now be contained in forcebalance.forcefield.FF ff = forcefield.openff_forcefield # analyze each target for target in targets: off_topology = None ## remote targets are not initialized yet, we do a manual setup here if isinstance(target, forcebalance.target.RemoteTarget): if target.r_tgt_opts['type'].endswith('SMIRNOFF'): target_path = os.path.join(target.root, target.tgtdir) if target.r_tgt_opts['type'] == 'OPTGEOTARGET_SMIRNOFF': # parse optgeo_options_txt and get the names of the mol2 files optgeo_options_txt = os.path.join( target_path, target.r_tgt_opts['optgeo_options_txt']) sys_opts = forcebalance.opt_geo_target.OptGeoTarget.parse_optgeo_options( optgeo_options_txt) openff_mols = [OffMolecule.from_file(os.path.join(target_path,fnm), allow_undefined_stereo=True) \ for sysopt in sys_opts.values() for fnm in sysopt['mol2']] else: openff_mols = [OffMolecule.from_file(os.path.join(target_path,fnm), allow_undefined_stereo=True) \ for fnm in target.r_tgt_opts.get('mol2', [])] off_topology = OffTopology.from_molecules(openff_mols) elif isinstance(target, forcebalance.opt_geo_target.OptGeoTarget): if target.engine_.__name__ == 'SMIRNOFF': target_path = os.path.join(target.root, target.tgtdir) openff_mols = [OffMolecule.from_file(os.path.join(target_path,fnm), allow_undefined_stereo=True) \ for sysopt in target.sys_opts.values() for fnm in sysopt['mol2']] off_topology = OffTopology.from_molecules(openff_mols) elif hasattr(target, 'engine') and isinstance( target.engine, SMIRNOFF) and hasattr(target.engine, 'off_topology'): off_topology = target.engine.off_topology if off_topology is not None: molecule_force_list = ff.label_molecules(off_topology) for mol_idx, mol_forces in enumerate(molecule_force_list): for force_tag, force_dict in mol_forces.items(): # e.g. force_tag = 'Bonds' for atom_indices, parameter in force_dict.items(): param_dict = { 'id': parameter.id, 'smirks': parameter.smirks, 'type': force_tag, 'atoms': list(atom_indices), } parameter_assignment_data[target.name].append( param_dict) parameter_counter[parameter.smirks] += 1 else: logger.warning( "No smirnoff topology or molecule found for target %s\n" % target.name) # write out parameter assignment data out_json_path = os.path.join(forcefield.root, 'smirnoff_parameter_assignments.json') with open(out_json_path, 'w') as jsonfile: json.dump(parameter_assignment_data, jsonfile, indent=2) logger.info("Force field assignment data written to %s\n" % out_json_path) # print parameter coverages logger.info("%4s %-100s %10s\n" % ("idx", "Parameter", "Count")) logger.info("-" * 118 + '\n') n_covered = 0 for i, p in enumerate(forcefield.plist): smirks = p.split('/')[-1] logger.info('%4i %-100s : %10d\n' % (i, p, parameter_counter[smirks])) if parameter_counter[smirks] > 0: n_covered += 1 logger.info( "SNIRNOFF Parameter Coverage Analysis result: %d/%d parameters are covered.\n" % (n_covered, len(forcefield.plist))) logger.info("-" * 118 + '\n')
def find_smirks_parameters(smiles_list, molecule_paths): """Finds the force field parameters which would be assigned to a list of molecules defined by the provided SMILES patterns. Parameters ---------- smiles_list: list of str The SMILES patterns of the target molecules molecule_paths: list of Path The list of molecules that correspond to the SMILES strings (to make it easier to see which molecules utilize which parameters) Returns ------- dict of str and list of str A dictionary with keys of SMIRKS patterns, and values of lists of SMILES patterns which would utilize those patterns, and the parameter ID in the force field. """ force_field = smirnoff.ForceField('smirnoff99Frosst-1.0.9.offxml') smiles_by_smirks = {} smiles_by_smirks["Bonds"] = {} smiles_by_smirks["Angles"] = {} smiles_by_smirks["ProperTorsions"] = {} smiles_by_smirks["vdW"] = {} smiles_by_smirks["ImproperTorsions"] = {} smiles_by_smirks["Electrostatics"] = {} # Populate the dictionary using the open force field toolkit. for index, smiles in enumerate(smiles_list): ifs = oechem.oemolistream() if not ifs.open(str(molecule_paths[index])): logging.error( f'Unable to open {molecule_paths[index]} for reading...') ifs.open(str(molecule_paths[index])) oe_mols = [] for mol in ifs.GetOEMols(): oe_mols.append(oechem.OEMol(mol)) oechem.OE3DToAtomStereo(oe_mols[0]) molecule = Molecule.from_openeye(oe_mols[0]) # molecule = Molecule.from_smiles(smiles, allow_undefined_stereo=True) topology = Topology.from_molecules([molecule]) molecule_force_list = force_field.label_molecules(topology) for molecule_index, molecule_forces in enumerate(molecule_force_list): print(f'Forces for molecule {molecule_index}') for force_name, force_dict in molecule_forces.items(): print(f"\n{force_name}:") for (atom_indices, parameter) in force_dict.items(): atomstr = '' for idx in atom_indices: atomstr += '%5s' % idx print("atoms: %s parameter_id: %s smirks %s" % ([ oe_mols[0].GetAtom(oechem.OEHasAtomIdx(i)).GetName() for i in atom_indices ], parameter.id, parameter.smirks)) # This is not catching _all_ the atoms that hit a certain parameter. # I think these need to be initialized in the outer loop. # Each parameter is getting a list of length 1. if parameter.id not in smiles_by_smirks[force_name]: smiles_by_smirks[force_name][parameter.id] = {} if "atom_indices" not in smiles_by_smirks[force_name]: smiles_by_smirks[force_name][ parameter.id]["atom_indices"] = [] if "atom_names" not in smiles_by_smirks[force_name]: smiles_by_smirks[force_name][ parameter.id]["atom_names"] = [] smiles_by_smirks[force_name][ parameter.id]["atom_indices"].append(atom_indices) smiles_by_smirks[force_name][ parameter.id]["atom_names"].append([ oe_mols[0].GetAtom( oechem.OEHasAtomIdx(i)).GetName() for i in atom_indices ]) smiles_by_smirks[force_name][ parameter.id]["smirks"] = parameter.smirks return smiles_by_smirks
def smirnoff_analyze_parameter_coverage(forcefield, tgt_opts): printcool("SMIRNOFF Parameter Coverage Analysis") assert hasattr(forcefield, 'offxml'), "Only SMIRNOFF Force Field is supported" parameter_assignment_data = defaultdict(list) parameter_counter = Counter() # The openforcefield.typing.engines.smirnoff.ForceField object should now be contained in forcebalance.forcefield.FF ff = forcefield.openff_forcefield # analyze each target for tgt_option in tgt_opts: target_path = os.path.join('targets', tgt_option['name']) # aggregate mol2 file paths from all targets mol2_paths = [] if tgt_option['type'] == 'OPTGEOTARGET_SMIRNOFF': # parse optgeo_options_txt and get the names of the mol2 files optgeo_options_txt = os.path.join(target_path, tgt_option['optgeo_options_txt']) sys_opts = forcebalance.opt_geo_target.OptGeoTarget.parse_optgeo_options( optgeo_options_txt) mol2_paths = [ os.path.join(target_path, fnm) for sysopt in sys_opts.values() for fnm in sysopt['mol2'] ] elif tgt_option['type'].endswith('_SMIRNOFF'): mol2_paths = [ os.path.join(target_path, fnm) for fnm in tgt_option['mol2'] ] # analyze SMIRKs terms for mol_fnm in mol2_paths: # we work with one file at a time to avoid the topology sliently combine "same" molecules openff_mol = OffMolecule.from_file(mol_fnm) off_topology = OffTopology.from_molecules([openff_mol]) molecule_force_list = ff.label_molecules(off_topology) for mol_idx, mol_forces in enumerate(molecule_force_list): for force_tag, force_dict in mol_forces.items(): # e.g. force_tag = 'Bonds' for atom_indices, parameter in force_dict.items(): param_dict = { 'id': parameter.id, 'smirks': parameter.smirks, 'type': force_tag, 'atoms': list(atom_indices), } parameter_assignment_data[mol_fnm].append(param_dict) parameter_counter[parameter.smirks] += 1 # write out parameter assignment data out_json_path = os.path.join(forcefield.root, 'smirnoff_parameter_assignments.json') with open(out_json_path, 'w') as jsonfile: json.dump(parameter_assignment_data, jsonfile, indent=2) logger.info("Force field assignment data written to %s\n" % out_json_path) # print parameter coverages logger.info("%4s %-100s %10s\n" % ("idx", "Parameter", "Count")) logger.info("-" * 118 + '\n') n_covered = 0 for i, p in enumerate(forcefield.plist): smirks = p.split('/')[-1] logger.info('%4i %-100s : %10d\n' % (i, p, parameter_counter[smirks])) if parameter_counter[smirks] > 0: n_covered += 1 logger.info( "SNIRNOFF Parameter Coverage Analysis result: %d/%d parameters are covered.\n" % (n_covered, len(forcefield.plist))) logger.info("-" * 118 + '\n')
def data_generator(): for mol in mols: try: mol = Molecule.from_openeye(mol) topology = Topology.from_molecules(mol) mol_sys = FF.create_openmm_system(topology) n_atoms = topology.n_topology_atoms atoms = tf.convert_to_tensor( [TRANSLATION[atom._atomic_number] for atom in mol.atoms], dtype=tf.float32) adjacency_map = np.zeros((n_atoms, n_atoms), dtype=np.float32) for bond in mol.bonds: assert bond.atom1_index < bond.atom2_index adjacency_map[bond.atom1_index, bond.atom2_index] = \ bond.bond_order adjacency_map = tf.convert_to_tensor( adjacency_map, dtype=tf.float32) top = Topology.from_molecules(mol) sys = FF.create_openmm_system(top) angles = tf.convert_to_tensor( [[x[0], x[1], x[2], (x[3]._value - 1.965) / 0.237, (x[4]._value - 507.28) / 396.80] for x in\ [sys.getForces( )[0].getAngleParameters(idx)\ for idx in range(sys.getForces( )[0].getNumAngles())]], dtype=tf.float32) bonds = tf.convert_to_tensor([[x[0], x[1], (x[2]._value - 0.126) / 0.0212, (x[3]._value - 274856) / 12213.203] for x in\ [sys.getForces( )[1].getBondParameters(idx)\ for idx in range(sys.getForces( )[1].getNumBonds())]], dtype=tf.float32) torsions = tf.convert_to_tensor([ [x[0], x[1], x[2], x[3], x[4], x[5]._value, x[6]._value] for x in\ [sys.getForces( )[3].getTorsionParameters(idx)\ for idx in range(sys.getForces( )[3].getNumTorsions())]], dtype=tf.float32) particle_params = tf.convert_to_tensor([[ (x[0]._value - 0.00195) / 0.269, (x[1]._value - 0.276) / 0.0654, (x[2]._value - 0.280) / 0.284 ] for x in\ [sys.getForces( )[2].getParticleParameters(idx)\ for idx in range(sys.getForces( )[2].getNumParticles())]]) yield atoms, adjacency_map, angles, bonds, torsions, particle_params except: pass
def list_matching_torsions(smi_file, forcefield): from fragmenter import chemi # chemi.file_to_oemols # generate oemols from smi file oemols = chemi.file_to_oemols(smi_file) # list of torsion parameters ff_torsion_param_list = forcefield.get_parameter_handler( 'ProperTorsions').parameters # tid_molecules_list[tid] = [{'mol_index': mol_index, 'indices': indices, 'covered_tids':covered_tids}, ...] tid_molecules_list = {} failed_smi = [] for torsion_param in ff_torsion_param_list: tid_molecules_list[torsion_param.id] = [] for oemol in tqdm(oemols): try: off_mol, mol_index, center_bond = gen_canonical_isomeric_smiles( oemol) oemol = Molecule.to_openeye(off_mol) except: failed_smi.append(oechem.OEMolToSmiles(oemol)) continue torsions_coverage = defaultdict(list) off_top = Topology.from_molecules(off_mol) center_tids = defaultdict(set) dihedrals = [] for torsion_indices, torsion_param in forcefield.label_molecules( off_top)[0]['ProperTorsions'].items(): i, j, k, l = torsion_indices if set([j, k]) == center_bond: center_tids[tuple(sorted([j, k]))].add(torsion_param.id) torsions_coverage[torsion_param].append(torsion_indices) dihedrals.append(torsion_indices) if not check_connectivity(dihedrals, oemol): print(f'## {mol_index} has diff bond info in oemol and offmol...') continue filtered_torsions_coverage = filter_torsions_coverage( torsions_coverage, oemol) # check connectivity for idx, (tid, indices_list) in enumerate( filtered_torsions_coverage.items()): for idxx, indices in enumerate(indices_list): if idxx == 0: # count once covered_tids = [] i, j, k, l = indices tids = center_tids[tuple(sorted([j, k]))] for i in tids: if i not in covered_tids: covered_tids.append(i) tid_molecules_list[tid].append({ 'mol_index': mol_index, 'indices': indices, 'covered_tids': covered_tids }) print("\n## Torsion parameter: matched molecules ##\n" + '-' * 90) print( f"{'idx':<7} {'ID':7s} {'SMIRKS Pattern':70s} {'Number of molecules matched'}" ) for idx, (tid, molecules_list) in enumerate(tid_molecules_list.items()): torsion_param = get_torsion_definition(ff_torsion_param_list, tid) print( f'{idx:<7} {torsion_param.id:7s} {torsion_param.smirks:70s} {len(molecules_list)}' ) print('-' * 90) return tid_molecules_list, failed_smi
def find_parameter_smirks_matches(parameter_tag="vdW", *smiles_patterns): """Finds those force field parameters with a given tag which would be assigned to a specified set of molecules defined by the their smiles patterns. Parameters ---------- parameter_tag: str The tag of the force field parameters to find. smiles_patterns: str The smiles patterns to assign the force field parameters to. Returns ------- dict of str and list of str A dictionary with keys of parameter smirks patterns, and values of lists of smiles patterns which would utilize those parameters. """ force_field = _get_default_force_field() parameter_handler = force_field.get_parameter_handler(parameter_tag) # Initialize the array with all possible smirks pattern # to make it easier to identify which are missing. smiles_by_parameter_smirks = { parameter.smirks: set() for parameter in parameter_handler.parameters } # Populate the dictionary using the open force field toolkit. for smiles in smiles_patterns: if (smiles not in cached_smirks_parameters or parameter_tag not in cached_smirks_parameters[smiles]): try: molecule = Molecule.from_smiles(smiles) except UndefinedStereochemistryError: # Skip molecules with undefined stereochemistry. continue topology = Topology.from_molecules([molecule]) if smiles not in cached_smirks_parameters: cached_smirks_parameters[smiles] = {} if parameter_tag not in cached_smirks_parameters[smiles]: cached_smirks_parameters[smiles][parameter_tag] = [] cached_smirks_parameters[smiles][parameter_tag] = [ parameter.smirks for parameter in force_field.label_molecules( topology)[0][parameter_tag].values() ] parameters_with_tag = cached_smirks_parameters[smiles][parameter_tag] for smirks in parameters_with_tag: smiles_by_parameter_smirks[smirks].add(smiles) return smiles_by_parameter_smirks
# Make the SystemGenerator system_generator = SystemGenerator( forcefields=[protein_forcefield, solvation_forcefield], barostat=barostat, periodic_forcefield_kwargs={"nonbondedMethod": app.PME}, small_molecule_forcefield=small_molecule_forcefield, molecules=ligand_dict[chosen_ligand], ) # Read in the PDB and create an OpenMM topology pdbfile = app.PDBFile(input_pdb) protein_topology, protein_positions = pdbfile.topology, pdbfile.positions # Add ligand to topology - credit to @hannahbrucemacdonald for help here print("--> Combining protein and ligand topologies") off_ligand_topology = Topology.from_molecules(ligand_dict[chosen_ligand]) ligand_topology = off_ligand_topology.to_openmm() ligand_positions = ligand_dict[chosen_ligand].conformers[0] md_protein_topology = md.Topology.from_openmm( protein_topology ) # using mdtraj for protein top md_ligand_topology = md.Topology.from_openmm( ligand_topology ) # using mdtraj for ligand top md_complex_topology = md_protein_topology.join(md_ligand_topology) # add them together complex_topology = md_complex_topology.to_openmm() # now back to openmm total_atoms = len(protein_positions) + len(ligand_positions) complex_positions = unit.Quantity(np.zeros([total_atoms, 3]), unit=unit.nanometers) complex_positions[0 : len(protein_positions)] = protein_positions
def _find_relevant_gradient_keys(substance, force_field_path, parameter_gradient_keys): """Extract only those keys which may be applied to the given substance. Parameters ---------- substance: Substance The substance to compare against. force_field_path: str The path to the force field which contains the parameters. parameter_gradient_keys: list of ParameterGradientKey The original list of parameter gradient keys. Returns ------- list of ParameterGradientKey The filtered list of parameter gradient keys. """ from openforcefield.topology import Molecule, Topology # noinspection PyTypeChecker if parameter_gradient_keys == UNDEFINED or len( parameter_gradient_keys) == 0: return [] with open(force_field_path) as file: force_field_source = ForceFieldSource.parse_json(file.read()) if not isinstance(force_field_source, SmirnoffForceFieldSource): return [] force_field = force_field_source.to_force_field() all_molecules = [] for component in substance.components: all_molecules.append(Molecule.from_smiles(component.smiles)) topology = Topology.from_molecules(all_molecules) labelled_molecules = force_field.label_molecules(topology) reduced_parameter_keys = [] for labelled_molecule in labelled_molecules: for parameter_key in parameter_gradient_keys: if (parameter_key.tag not in labelled_molecule or parameter_key in reduced_parameter_keys): continue contains_parameter = False for parameter in labelled_molecule[ parameter_key.tag].store.values(): if parameter.smirks != parameter_key.smirks: continue contains_parameter = True break if not contains_parameter: continue reduced_parameter_keys.append(parameter_key) return reduced_parameter_keys
import os from openforcefield.topology import Molecule as Off_Molecule from openforcefield.topology import Topology as Off_Topology from openforcefield.typing.engines.smirnoff import ForceField test_ff = ForceField("../../forcefield/param_valence.offxml", allow_cosmetic_attributes=True) for f in os.listdir('.'): if f.endswith('mol2'): print(f) off_molecule = Off_Molecule.from_file(f) off_topology = Off_Topology.from_molecules(off_molecule) test_ff.create_openmm_system(off_topology)
def topology_batched_md(num=-1, batch_size=16, step_size=100, ani_path='.'): gs = [] import dgl from dgl import data ofs = oechem.oemolostream() idx = 0 for path in os.listdir(ani_path): if idx > num and num != -1: break if path.endswith('.h5'): f = h5py.File(path, 'r') for d0 in list(f.keys()): if idx > num and num != -1: break for d1 in list(f[d0].keys()): if idx > num and num != -1: break print(idx) try: smiles = ''.join([ x.decode('utf-8') for x in f[d0][d1]['smiles'].value.tolist() ]) coordinates = f[d0][d1]['coordinates'].value energies = f[d0][d1]['energies'].value species = [ x.decode('utf-8') for x in f[d0][d1]['species'].value ] low_energy_idx = np.argsort(energies)[0] g, mol = get_ani_mol(coordinates[low_energy_idx], species, smiles) ofs.open('ds_md/' + str(idx) + '.sdf') oechem.OEWriteMolecule(ofs, mol) # g = hgfp.graph.from_oemol(mol) # g = hgfp.data.mm_energy.u(mol, toolkit='openeye', return_graph=True) mol = Molecule.from_openeye(mol) topology = Topology.from_molecules(mol) mol_sys = FF.create_openmm_system(topology) integrator = LangevinIntegrator( 500 * kelvin, 1 / picosecond, 0.002 * picoseconds) simulation = Simulation(topology.to_openmm(), mol_sys, integrator) simulation.context.setPositions(0.1 * g.ndata['xyz'].numpy()) simulation.reporters.append( DCDReporter('ds_md/' + str(idx) + '.dcd', 10)) simulation.minimizeEnergy() simulation.step(10000) idx += 1 except: continue
def find_smirks_parameters(parameter_tag='vdW', *smiles_patterns): """Finds those force field parameters with a given tag which would be assigned to a specified set of molecules defined by the their smiles patterns. Parameters ---------- parameter_tag: str The tag of the force field parameters to find. smiles_patterns: str The smiles patterns to assign the force field parameters to. Returns ------- dict of str and list of str A dictionary with keys of parameter smirks patterns, and values of lists of smiles patterns which would utilize those parameters. """ stdout_ = sys.stdout # Keep track of the previous value. stderr_ = sys.stderr # Keep track of the previous value. stream = StringIO() sys.stdout = stream sys.stderr = stream force_field = ForceField('smirnoff99Frosst-1.1.0.offxml') sys.stdout = stdout_ # restore the previous stdout. sys.stderr = stderr_ parameter_handler = force_field.get_parameter_handler(parameter_tag) smiles_by_parameter_smirks = {} # Initialize the array with all possible smirks pattern # to make it easier to identify which are missing. for parameter in parameter_handler.parameters: if parameter.smirks in smiles_by_parameter_smirks: continue smiles_by_parameter_smirks[parameter.smirks] = set() # Populate the dictionary using the open force field toolkit. for smiles in smiles_patterns: if smiles not in cached_smirks_parameters or parameter_tag not in cached_smirks_parameters[smiles]: try: molecule = Molecule.from_smiles(smiles) except UndefinedStereochemistryError: # Skip molecules with undefined stereochemistry. continue topology = Topology.from_molecules([molecule]) if smiles not in cached_smirks_parameters: cached_smirks_parameters[smiles] = {} if parameter_tag not in cached_smirks_parameters[smiles]: cached_smirks_parameters[smiles][parameter_tag] = [] cached_smirks_parameters[smiles][parameter_tag] = [ parameter.smirks for parameter in force_field.label_molecules(topology)[0][parameter_tag].values() ] parameters_with_tag = cached_smirks_parameters[smiles][parameter_tag] for smirks in parameters_with_tag: smiles_by_parameter_smirks[smirks].add(smiles) return smiles_by_parameter_smirks
def probe_by_parameter(probe_param, ffxml, subdir, all_probe_mols, inpickle): """ For a single force field parameter, (1) find all molecules that use this parameter, and (2) save them to a mol2 file labeled with the parameter ID. Parameters ---------- probe_param : string Name of the parameter to investigate ffxml : string Name of the FFXML force field file subdir : string Name of subdirectory in which to save mol2 files for each parameter all_probe_mols : dict key is string of a parameter id to be probed; value is an empty list inpickle : string Name of the pickle file from output of tailed_parameters.py Returns ------- all_probe_mols : dict key is string of a parameter id to be probed; value is a list of oegraphmols with this parameter id """ prefix_dict = {'a':'Angles', 'b':'Bonds', 'i':'ImproperTorsions', 'n':'vdW', 't':'ProperTorsions'} # load parameter dictionaries from pickle with open(inpickle, 'rb') as f: data_all, data_out = pickle.load(f) params_id_out = data_out['params_id'] # find the first mol in outlier set with given param mols_with_probe = list(params_id_out[probe_param]) probe_mol = Molecule.from_smiles(mols_with_probe[0], allow_undefined_stereo=True) topology = Topology.from_molecules([probe_mol]) # load in force field ff = ForceField(ffxml) # run molecule labeling molecule_force_list = ff.label_molecules(topology) # get the smirks pattern associated with param prefix = probe_param[0] force_dict = molecule_force_list[0][prefix_dict[prefix]] for (k, v) in force_dict.items(): if v.id == probe_param: probe_smirks = v.smirks break print(f"\n=====\n{probe_param}: {probe_smirks}\n=====") # find all molecules with this parameter and save to file. # conformers are not considered here so these smiles refer to # an arbitrary conformer assigned in dict after zip # (since duplicate keys are removed in dict) outfile = f'{subdir}/param_{probe_param}.mol2' ofs = oechem.oemolostream() if not ofs.open(outfile): oechem.OEThrow.Fatal("Unable to open %s for writing" % outfile) for m in mols_with_probe: key = data_out['smi_dict'][m] print(f"writing out {key}") mymol = data_out['mols_dict'][key]['structure'] oechem.OEWriteConstMolecule(ofs, mymol) # save to write full pdf later on all_probe_mols[probe_param].append(oechem.OEGraphMol(mymol)) return all_probe_mols
def data_generator(): for record_name in random.sample(list(ds_qc.data.records), 10): try: print(record_name, flush=True) r = ds_qc.get_record(record_name, specification='default') if r is not None: traj = r.get_trajectory() if traj is not None: for snapshot in traj: energy = tf.convert_to_tensor( snapshot.properties.scf_total_energy * HARTREE_TO_KJ_PER_MOL, dtype=tf.float32) mol = snapshot.get_molecule() # mol = snapshot.get_molecule().dict(encoding='json') atoms = tf.convert_to_tensor([ TRANSLATION[atomic_number] for atomic_number in mol.atomic_numbers ], dtype=tf.int64) zeros = tf.zeros((tf.shape( atoms, tf.int64)[0], tf.shape(atoms, tf.int64)[0]), dtype=tf.float32) adjacency_map = tf.tensor_scatter_nd_update( tf.zeros((tf.shape(atoms, tf.int64)[0], tf.shape(atoms, tf.int64)[0]), dtype=tf.float32), tf.convert_to_tensor(np.array( mol.connectivity)[:, :2], dtype=tf.int64), tf.convert_to_tensor(np.array(mol.connectivity)[:, 2], dtype=tf.float32)) xyz = tf.convert_to_tensor(mol.geometry * BOHR_TO_NM, dtype=tf.float32) jacobian = tf.convert_to_tensor( snapshot.return_result\ * HARTREE_PER_BOHR_TO_KJ_PER_MOL_PER_NM, dtype=tf.float32) mol = cmiles.utils.load_molecule( mol.dict(encoding='json')) top = Topology.from_molecules( Molecule.from_openeye(mol)) sys = FF.create_openmm_system(top) angles = tf.convert_to_tensor( [[x[0], x[1], x[2], x[3]._value, x[4]._value] for x in\ [sys.getForces( )[0].getAngleParameters(idx)\ for idx in range(sys.getForces( )[0].getNumAngles())]], dtype=tf.float32) bonds = tf.convert_to_tensor([[x[0], x[1], x[2]._value, x[3]._value] for x in\ [sys.getForces( )[1].getBondParameters(idx)\ for idx in range(sys.getForces( )[1].getNumBonds())]], dtype=tf.float32) torsions = tf.convert_to_tensor([ [x[0], x[1], x[2], x[3], x[4], x[5]._value, x[6]._value] for x in\ [sys.getForces( )[3].getTorsionParameters(idx)\ for idx in range(sys.getForces( )[3].getNumTorsions())]], dtype=tf.float32) particle_params = tf.convert_to_tensor([[ x[0]._value, x[1]._value, x[2]._value ] for x in\ [sys.getForces( )[2].getParticleParameters(idx)\ for idx in range(sys.getForces( )[2].getNumParticles())]]) yield (atoms, adjacency_map, energy, xyz, jacobian, angles, bonds, torsions, particle_params, sys) except: pass
print("Making output directory ...") output_directory = pathlib.Path(output_directory) (output_directory / "equilibration").mkdir(parents=True, exist_ok=True) print("Writing packages of conda environment ...") with open(output_directory / "conda_environment.txt", "w") as wf: subprocess.run(["conda", "list"], stdout=wf) print("Reading PDB file ...") pdb = app.PDBFile(pdb_path) topology, positions = pdb.topology, pdb.positions if len(ligand_path) > 0: print("Combining topologies ...") # credit to @hannahbrucemacdonald molecule = Molecule.from_file(ligand_path) off_ligand_topology = Topology.from_molecules(molecule) ligand_topology = off_ligand_topology.to_openmm() ligand_positions = molecule.conformers[0] md_protein_topology = md.Topology.from_openmm( topology ) # using mdtraj for protein top md_ligand_topology = md.Topology.from_openmm( ligand_topology ) # using mdtraj for ligand top md_complex_topology = md_protein_topology.join( md_ligand_topology ) # add them together complex_topology = md_complex_topology.to_openmm() # now back to openmm total_atoms = len(positions) + len(ligand_positions) complex_positions = unit.Quantity(np.zeros([total_atoms, 3]), unit=unit.nanometers) complex_positions[0 : len(positions)] = positions