def test_pre_equilibration(self): """Verify that equilibration of edges up front functions as expected""" complex_system, complex_coords, _, _, complex_box, _ = builders.build_protein_system( os.path.join(DATA_DIR, "hif2a_nowater_min.pdb")) # build the water system solvent_system, solvent_coords, solvent_box, _ = builders.build_water_system( 4.0) client = CUDAPoolClient(NUM_GPUS) model = RBFEModel( client=client, ff=hif2a_ligand_pair.ff, complex_system=complex_system, complex_coords=complex_coords, complex_box=complex_box, complex_schedule=construct_lambda_schedule(2), solvent_system=solvent_system, solvent_coords=solvent_coords, solvent_box=solvent_box, solvent_schedule=construct_lambda_schedule(2), equil_steps=10, prod_steps=100, ) mol_a = hif2a_ligand_pair.mol_a mol_b = hif2a_ligand_pair.mol_b core = hif2a_ligand_pair.core assert len(model._equil_cache) == 0 with TemporaryDirectory() as tempdir: cache_path = os.path.join(tempdir, "equil_cache.pkl") # If model.pre_equilibrate is false, its a noop model.equilibrate_edges([(mol_a, mol_b, core)], equilibration_steps=10, cache_path=cache_path) assert len(model._equil_cache) == 0 # Enable pre-equilibration model.pre_equilibrate = True model.equilibrate_edges([(mol_a, mol_b, core)], equilibration_steps=10, cache_path=cache_path) # Cache should contain starting coords for both solvent and complex stages assert len(model._equil_cache) == 2
def test_minimizer(): complex_system, complex_coords, _, _, complex_box, _ = builders.build_protein_system( "tests/data/hif2a_nowater_min.pdb") suppl = Chem.SDMolSupplier("tests/data/ligands_40.sdf", removeHs=False) all_mols = [x for x in suppl] mol_a = all_mols[1] mol_b = all_mols[4] ff = Forcefield.load_from_file("smirnoff_1_1_0_ccc.py") # these methods will throw if the minimization failed minimizer.minimize_host_4d([mol_a, mol_b], complex_system, complex_coords, ff, complex_box) minimizer.minimize_host_4d([mol_a], complex_system, complex_coords, ff, complex_box) minimizer.minimize_host_4d([mol_b], complex_system, complex_coords, ff, complex_box)
def test_predict(self): """Just to verify that we can handle the most basic RBFE prediction""" # Use the Simple Charges to verify determinism of model. Needed as one endpoint uses the ff definition forcefield = Forcefield.load_from_file("smirnoff_1_1_0_sc.py") complex_system, complex_coords, _, _, complex_box, _ = builders.build_protein_system( os.path.join(DATA_DIR, "hif2a_nowater_min.pdb")) # build the water system solvent_system, solvent_coords, solvent_box, _ = builders.build_water_system( 4.0) client = CUDAPoolClient(NUM_GPUS) model = RBFEModel( client=client, ff=forcefield, complex_system=complex_system, complex_coords=complex_coords, complex_box=complex_box, complex_schedule=construct_lambda_schedule(2), solvent_system=solvent_system, solvent_coords=solvent_coords, solvent_box=solvent_box, solvent_schedule=construct_lambda_schedule(2), equil_steps=10, prod_steps=100, ) ordered_params = forcefield.get_ordered_params() mol_a = hif2a_ligand_pair.mol_a mol_b = hif2a_ligand_pair.mol_b core = hif2a_ligand_pair.core ddg, results = model.predict(ordered_params, mol_a, mol_b, core) self.assertEqual(len(results), 2) self.assertIsInstance(ddg, float)
def test_relative_free_energy(): # test that we can properly build a single topology host guest system and # that we can run a few steps in a stable way. This tests runs both the complex # and the solvent stages. suppl = Chem.SDMolSupplier("tests/data/ligands_40.sdf", removeHs=False) all_mols = [x for x in suppl] mol_a = all_mols[1] mol_b = all_mols[4] core = np.array([ [0, 0], [2, 2], [1, 1], [6, 6], [5, 5], [4, 4], [3, 3], [15, 16], [16, 17], [17, 18], [18, 19], [19, 20], [20, 21], [32, 30], [26, 25], [27, 26], [7, 7], [8, 8], [9, 9], [10, 10], [29, 11], [11, 12], [12, 13], [14, 15], [31, 29], [13, 14], [23, 24], [30, 28], [28, 27], [21, 22], ]) complex_system, complex_coords, _, _, complex_box, _ = builders.build_protein_system( "tests/data/hif2a_nowater_min.pdb") # build the water system. solvent_system, solvent_coords, solvent_box, _ = builders.build_water_system( 4.0) ff = Forcefield.load_from_file("smirnoff_1_1_0_ccc.py") ff_params = ff.get_ordered_params() seed = 2021 lambda_schedule = np.linspace(0, 1.0, 4) equil_steps = 1000 prod_steps = 1000 single_topology = topology.SingleTopology(mol_a, mol_b, core, ff) rfe = free_energy.RelativeFreeEnergy(single_topology) def vacuum_model(ff_params): unbound_potentials, sys_params, masses, coords = rfe.prepare_vacuum_edge( ff_params) x0 = coords v0 = np.zeros_like(coords) client = CUDAPoolClient(1) box = np.eye(3, dtype=np.float64) * 100 harmonic_bond_potential = unbound_potentials[0] group_idxs = get_group_indices(get_bond_list(harmonic_bond_potential)) x0 = coords v0 = np.zeros_like(coords) client = CUDAPoolClient(1) temperature = 300.0 pressure = 1.0 integrator = LangevinIntegrator(temperature, 1.5e-3, 1.0, masses, seed) barostat = MonteCarloBarostat(x0.shape[0], pressure, temperature, group_idxs, 25, seed) model = estimator.FreeEnergyModel(unbound_potentials, client, box, x0, v0, integrator, lambda_schedule, equil_steps, prod_steps, barostat) return estimator.deltaG(model, sys_params)[0] dG = vacuum_model(ff_params) assert np.abs(dG) < 1000.0 def binding_model(ff_params): dGs = [] for host_system, host_coords, host_box in [ (complex_system, complex_coords, complex_box), (solvent_system, solvent_coords, solvent_box), ]: # minimize the host to avoid clashes host_coords = minimizer.minimize_host_4d([mol_a], host_system, host_coords, ff, host_box) unbound_potentials, sys_params, masses, coords = rfe.prepare_host_edge( ff_params, host_system, host_coords) x0 = coords v0 = np.zeros_like(coords) client = CUDAPoolClient(1) harmonic_bond_potential = unbound_potentials[0] group_idxs = get_group_indices( get_bond_list(harmonic_bond_potential)) temperature = 300.0 pressure = 1.0 integrator = LangevinIntegrator(temperature, 1.5e-3, 1.0, masses, seed) barostat = MonteCarloBarostat(x0.shape[0], pressure, temperature, group_idxs, 25, seed) model = estimator.FreeEnergyModel( unbound_potentials, client, host_box, x0, v0, integrator, lambda_schedule, equil_steps, prod_steps, barostat, ) dG, _ = estimator.deltaG(model, sys_params) dGs.append(dG) return dGs[0] - dGs[1] dG = binding_model(ff_params) assert np.abs(dG) < 1000.0
def test_absolute_free_energy(): suppl = Chem.SDMolSupplier("tests/data/ligands_40.sdf", removeHs=False) all_mols = [x for x in suppl] mol = all_mols[1] complex_system, complex_coords, _, _, complex_box, _ = builders.build_protein_system( "tests/data/hif2a_nowater_min.pdb") # build the water system. solvent_system, solvent_coords, solvent_box, _ = builders.build_water_system( 4.0) ff = Forcefield.load_from_file("smirnoff_1_1_0_ccc.py") ff_params = ff.get_ordered_params() seed = 2021 lambda_schedule = np.linspace(0, 1.0, 4) equil_steps = 1000 prod_steps = 1000 afe = free_energy.AbsoluteFreeEnergy(mol, ff) def absolute_model(ff_params): dGs = [] for host_system, host_coords, host_box in [ (complex_system, complex_coords, complex_box), (solvent_system, solvent_coords, solvent_box), ]: # minimize the host to avoid clashes host_coords = minimizer.minimize_host_4d([mol], host_system, host_coords, ff, host_box) unbound_potentials, sys_params, masses, coords = afe.prepare_host_edge( ff_params, host_system, host_coords) harmonic_bond_potential = unbound_potentials[0] group_idxs = get_group_indices( get_bond_list(harmonic_bond_potential)) x0 = coords v0 = np.zeros_like(coords) client = CUDAPoolClient(1) temperature = 300.0 pressure = 1.0 integrator = LangevinIntegrator(temperature, 1.5e-3, 1.0, masses, seed) barostat = MonteCarloBarostat(x0.shape[0], pressure, temperature, group_idxs, 25, seed) model = estimator.FreeEnergyModel( unbound_potentials, client, host_box, x0, v0, integrator, lambda_schedule, equil_steps, prod_steps, barostat, ) dG, _ = estimator.deltaG(model, sys_params) dGs.append(dG) return dGs[0] - dGs[1] dG = absolute_model(ff_params) assert np.abs(dG) < 1000.0
def do_relative_docking(host_pdbfile, mol_a, mol_b, core, num_switches, transition_steps): """Runs non-equilibrium switching jobs: 1. Solvates a protein, minimizes w.r.t guest_A, equilibrates & spins off switching jobs (deleting guest_A while inserting guest_B) every 1000th step, calculates work. 2. Does the same thing in solvent instead of protein Does num_switches switching jobs per leg. Parameters ---------- host_pdbfile (str): path to host pdb file mol_a (rdkit mol): the starting ligand to swap from mol_b (rdkit mol): the ending ligand to swap to core (np.array[[int, int], [int, int], ...]): the common core atoms between mol_a and mol_b num_switches (int): number of switching trajectories to run per compound pair per leg transition_stpes (int): length of each switching trajectory Returns ------- {str: float}: map of leg label to work values of switching mol_a to mol_b in that leg, {'protein': [work values], 'solvent': [work_values]} Output ------ stdout noting the step number, lambda value, and energy at various steps stdout noting the work of transition, if applicable stdout noting how long it took to run Note ---- The work will not be calculated if any norm of force per atom exceeds 20000 kJ/(mol*nm) [MAX_NORM_FORCE defined in docking/report.py] The simulations won't run if the atom maps are not factorizable """ # Prepare host # TODO: handle extra (non-transitioning) guests? print("Solvating host...") ( solvated_host_system, solvated_host_coords, _, _, host_box, solvated_topology, ) = builders.build_protein_system(host_pdbfile) # Prepare water box print("Generating water box...") # TODO: water box probably doesn't need to be this big box_lengths = host_box[np.diag_indices(3)] water_box_width = min(box_lengths) ( water_system, water_coords, water_box, water_topology, ) = builders.build_water_system(water_box_width) # it's okay if the water box here and the solvated protein box don't align -- they have PBCs # Run the procedure start_time = time.time() guest_name_a = mol_a.GetProp("_Name") guest_name_b = mol_b.GetProp("_Name") combined_name = guest_name_a + "-->" + guest_name_b guest_conformer_a = mol_a.GetConformer(0) orig_guest_coords_a = np.array(guest_conformer_a.GetPositions(), dtype=np.float64) orig_guest_coords_a = orig_guest_coords_a / 10 # convert to md_units ff = Forcefield.load_from_file("smirnoff_1_1_0_ccc.py") all_works = {} for system, coords, box, label in zip( [solvated_host_system, water_system], [solvated_host_coords, water_coords], [host_box, water_box], ["protein", "solvent"], ): # minimize w.r.t. both mol_a and mol_b? min_coords = minimizer.minimize_host_4d([mol_a], system, coords, ff, box) try: single_topology = topology.SingleTopology(mol_a, mol_b, core, ff) rfe = free_energy.RelativeFreeEnergy(single_topology) ups, sys_params, combined_masses, combined_coords = rfe.prepare_host_edge( ff.get_ordered_params(), system, min_coords) except topology.AtomMappingError as e: print(f"NON-FACTORIZABLE PAIR: {combined_name}") print(e) return {} combined_bps = [] for up, sp in zip(ups, sys_params): combined_bps.append(up.bind(sp)) all_works[label] = run_leg( combined_coords, combined_bps, combined_masses, box, combined_name, label, num_switches, transition_steps, ) end_time = time.time() print( f"{combined_name} {label} leg time:", "%.2f" % (end_time - start_time), "seconds", ) return all_works
def estimate_dG( transformation: RelativeTransformation, num_lambdas: int, num_steps_per_lambda: int, num_equil_steps: int, ): # build the protein system. complex_system, complex_coords, _, _, complex_box = builders.build_protein_system( path_to_protein) # build the water system. solvent_system, solvent_coords, solvent_box, _ = builders.build_water_system( 4.0) stage_dGs = [] ff = transformation.ff mol_a, mol_b = transformation.mol_a, transformation.mol_b core = transformation.core # TODO: measure performance of complex and solvent separately lambda_schedule = construct_lambda_schedule(num_lambdas) for stage, host_system, host_coords, host_box in [ ("complex", complex_system, complex_coords, complex_box), ("solvent", solvent_system, solvent_coords, solvent_box), ]: print("Minimizing the host structure to remove clashes.") minimized_host_coords = minimizer.minimize_host_4d( mol_a, host_system, host_coords, ff, host_box) single_topology = topology.SingleTopology(mol_a, mol_b, core, ff) rfe = free_energy.RelativeFreeEnergy(single_topology) # solvent leg host_args = [] for lambda_idx, lamb in enumerate(lambda_schedule): gpu_idx = lambda_idx % num_gpus host_args.append( (gpu_idx, lamb, host_system, minimized_host_coords, host_box, num_equil_steps, num_steps_per_lambda)) # one GPU job per lambda window print("submitting tasks to client!") do_work = partial(wrap_method, fxn=rfe.host_edge) futures = [] for lambda_idx, lamb in enumerate(lambda_schedule): arg = (lamb, host_system, minimized_host_coords, host_box, num_equil_steps, num_steps_per_lambda) futures.append(client.submit(do_work, arg)) results = [] for fut in futures: results.append(fut.result()) def _mean_du_dlambda(result): """summarize result of rfe.host_edge into mean du/dl TODO: refactor where this analysis step occurs """ bonded_du_dl, nonbonded_du_dl, _ = result return np.mean(bonded_du_dl + nonbonded_du_dl) dG_host = np.trapz([_mean_du_dlambda(x) for x in results], lambda_schedule) stage_dGs.append(dG_host) pred = stage_dGs[0] - stage_dGs[1] return pred
path_to_ligand = cmd_args.ligand_sdf suppl = Chem.SDMolSupplier(path_to_ligand, removeHs=False) forcefield = Forcefield.load_from_file("smirnoff_1_1_0_ccc.py") mols = [x for x in suppl] dataset = Dataset(mols) # construct lambda schedules for complex and solvent complex_absolute_schedule = construct_absolute_lambda_schedule_complex( cmd_args.num_complex_windows) solvent_absolute_schedule = construct_absolute_lambda_schedule_solvent( cmd_args.num_solvent_windows) # build the protein system. complex_system, complex_coords, _, _, complex_box, complex_topology = builders.build_protein_system( cmd_args.protein_pdb) solvent_system, solvent_coords, solvent_box, solvent_topology = builders.build_water_system( 4.0) blocker_mol = None for mol in mols: if mol.GetProp("_Name") == cmd_args.blocker_name: # we should only have one copy. assert blocker_mol is None blocker_mol = mol assert blocker_mol is not None frame_filter = None
else: validation = dataset training = Dataset([]) with open(output_path.joinpath("training_edges.pk"), "wb") as ofs: dump(training.data, ofs) if len(validation): with open(output_path.joinpath("validation_edges.pk"), "wb") as ofs: dump(validation.data, ofs) # Build all of the different protein systems systems = {} for prot_path in protein_paths: # build the complex system # note: "complex" means "protein + solvent" complex_system, complex_coords, _, _, complex_box, _ = builders.build_protein_system( prot_path) # build the water system solvent_system, solvent_coords, solvent_box, _ = builders.build_water_system( 4.0) systems[prot_path] = RBFEModel( client=client, ff=forcefield, complex_system=complex_system, complex_coords=complex_coords, complex_box=complex_box, complex_schedule=construct_lambda_schedule( configuration.num_complex_windows), solvent_system=solvent_system, solvent_coords=solvent_coords,
def dock_and_equilibrate( host_pdbfile, guests_sdfile, max_lambda, insertion_steps, eq_steps, outdir, fewer_outfiles=False, constant_atoms=[], ): """Solvates a host, inserts guest(s) into solvated host, equilibrates Parameters ---------- host_pdbfile: path to host pdb file to dock into guests_sdfile: path to input sdf with guests to pose/dock max_lambda: lambda value the guest should insert from or delete to (recommended: 1.0 for work calulation, 0.25 to stay close to original pose) (must be =1 for work calculation to be applicable) insertion_steps: how many steps to insert the guest over (recommended: 501) eq_steps: how many steps of equilibration to do after insertion (recommended: 15001) outdir: where to write output (will be created if it does not already exist) fewer_outfiles: if True, will only write frames for the equilibration, not insertion constant_atoms: atom numbers from the host_pdbfile to hold mostly fixed across the simulation (1-indexed, like PDB files) Output ------ A pdb & sdf file for the last step of insertion (outdir/<guest_name>/<guest_name>_ins_<step>_[host.pdb/guest.sdf]) A pdb & sdf file every 1000 steps of equilibration (outdir/<guest_name>/<guest_name>_eq_<step>_[host.pdb/guest.sdf]) stdout corresponding to the files written noting the lambda value and energy stdout for each guest noting the work of transition, if applicable stdout for each guest noting how long it took to run Note ---- The work will not be calculated if the du_dl endpoints are not close to 0 or if any norm of force per atom exceeds 20000 kJ/(mol*nm) [MAX_NORM_FORCE defined in docking/report.py] """ if not os.path.exists(outdir): os.makedirs(outdir) print(f""" HOST_PDBFILE = {host_pdbfile} GUESTS_SDFILE = {guests_sdfile} OUTDIR = {outdir} MAX_LAMBDA = {max_lambda} INSERTION_STEPS = {insertion_steps} EQ_STEPS = {eq_steps} """) # Prepare host # TODO: handle extra (non-transitioning) guests? print("Solvating host...") ( solvated_host_system, solvated_host_coords, _, _, host_box, solvated_topology, ) = builders.build_protein_system(host_pdbfile) _, solvated_host_pdb = tempfile.mkstemp(suffix=".pdb", text=True) writer = pdb_writer.PDBWriter([solvated_topology], solvated_host_pdb) writer.write_frame(solvated_host_coords) writer.close() solvated_host_mol = Chem.MolFromPDBFile(solvated_host_pdb, removeHs=False) os.remove(solvated_host_pdb) ff = Forcefield.load_from_file("smirnoff_1_1_0_ccc.py") # Run the procedure print("Getting guests...") suppl = Chem.SDMolSupplier(guests_sdfile, removeHs=False) for guest_mol in suppl: start_time = time.time() guest_name = guest_mol.GetProp("_Name") guest_conformer = guest_mol.GetConformer(0) orig_guest_coords = np.array(guest_conformer.GetPositions(), dtype=np.float64) orig_guest_coords = orig_guest_coords / 10 # convert to md_units minimized_coords = minimizer.minimize_host_4d([guest_mol], solvated_host_system, solvated_host_coords, ff, host_box) afe = free_energy.AbsoluteFreeEnergy(guest_mol, ff) ups, sys_params, combined_masses, _ = afe.prepare_host_edge( ff.get_ordered_params(), solvated_host_system, minimized_coords) combined_bps = [] for up, sp in zip(ups, sys_params): combined_bps.append(up.bind(sp)) x0 = np.concatenate([minimized_coords, orig_guest_coords]) v0 = np.zeros_like(x0) print("SYSTEM", f"guest_name: {guest_name}", f"num_atoms: {len(x0)}") for atom_num in constant_atoms: combined_masses[atom_num - 1] += 50000 seed = 2021 intg = LangevinIntegrator(300.0, 1.5e-3, 1.0, combined_masses, seed).impl() u_impls = [] for bp in combined_bps: bp_impl = bp.bound_impl(precision=np.float32) u_impls.append(bp_impl) ctxt = custom_ops.Context(x0, v0, host_box, intg, u_impls) # insert guest insertion_lambda_schedule = np.linspace(max_lambda, 0.0, insertion_steps) calc_work = True # collect a du_dl calculation once every other step subsample_interval = 1 full_du_dls, _, _ = ctxt.multiple_steps(insertion_lambda_schedule, subsample_interval) step = len(insertion_lambda_schedule) - 1 lamb = insertion_lambda_schedule[-1] ctxt.step(lamb) report.report_step( ctxt, step, lamb, host_box, combined_bps, u_impls, guest_name, insertion_steps, "INSERTION", ) if not fewer_outfiles: host_coords = ctxt.get_x_t()[:len(solvated_host_coords)] * 10 guest_coords = ctxt.get_x_t()[len(solvated_host_coords):] * 10 report.write_frame( host_coords, solvated_host_mol, guest_coords, guest_mol, guest_name, outdir, str(step).zfill(len(str(insertion_steps))), "ins", ) if report.too_much_force(ctxt, lamb, host_box, combined_bps, u_impls): print("Not calculating work (too much force)") calc_work = False continue # Note: this condition only applies for ABFE, not RBFE if abs(full_du_dls[0]) > 0.001 or abs(full_du_dls[-1]) > 0.001: print("Not calculating work (du_dl endpoints are not ~0)") calc_work = False if calc_work: work = np.trapz(full_du_dls, insertion_lambda_schedule[::subsample_interval]) print(f"guest_name: {guest_name}\tinsertion_work: {work:.2f}") # equilibrate for step in range(eq_steps): ctxt.step(0.00) if step % 1000 == 0: report.report_step( ctxt, step, 0.00, host_box, combined_bps, u_impls, guest_name, eq_steps, "EQUILIBRATION", ) if (not fewer_outfiles) or (step == eq_steps - 1): host_coords = ctxt.get_x_t()[:len(solvated_host_coords )] * 10 guest_coords = ctxt.get_x_t()[len(solvated_host_coords ):] * 10 report.write_frame( host_coords, solvated_host_mol, guest_coords, guest_mol, guest_name, outdir, str(step).zfill(len(str(eq_steps))), "eq", ) if step in (0, int(eq_steps / 2), eq_steps - 1): if report.too_much_force(ctxt, 0.00, host_box, combined_bps, u_impls): break end_time = time.time() print(f"{guest_name} took {(end_time - start_time):.2f} seconds")
def calculate_rigorous_work( host_pdbfile, guests_sdfile, outdir, num_deletions, deletion_steps, insertion_max_lambda=0.5, insertion_steps=501, eq1_steps=5001, fewer_outfiles=False, no_outfiles=False, ): """Runs non-equilibrium deletion jobs: 1. Solvates a protein, inserts guest, equilibrates, equilibrates more & spins off deletion jobs every 1000th step, calculates work. 2. Does the same thing in solvent instead of protein. Does num_deletions deletion jobs per leg per compound. Parameters ---------- host_pdbfile (str): path to host pdb file guests_sdfile (str): path to guests sdf file outdir (str): path to directory to which to write output num_deletions (int): number of deletion trajectories to run per leg per compound deletion_steps (int): length of each deletion trajectory insertion_max_lambda (float): how far away to insert from (0.0-1.0) insertion_steps (int): how long to insert over eq1_steps (int): how long to equilibrate after insertion and before starting the deletions fewer_outfiles (bool): only save the starting frame of each deletion trajectory no_outfiles (bool): don't keep any output files Returns ------- {str: {str: float}}: map of compound to leg label to work values {'guest_1': {'protein': [work values], 'solvent': [work_values]}, ...} Output ------ A pdb & sdf file for each guest's final insertion step (outdir/<guest_name>_pd_<step>_host.pdb & outdir/<guest_name>_pd_<step>_guest.sdf) (unless fewer_outfiles or no_outfiles is True) A pdb & sdf file for each guest's final eq1 step (outdir/<guest_name>_pd_<step>_host.pdb & outdir/<guest_name>_pd_<step>_guest.sdf) (unless fewer_outfiles or no_outfiles is True) A pdb & sdf file for each deletion job's first step (outdir/<guest_name>_pd_<step>_host.pdb & outdir/<guest_name>_pd_<step>_guest.sdf) (unless no_outfiles is True) stdout corresponding to the files written noting the lambda value and energy stdout noting the work of deletion, if applicable stdout noting how long each leg took to run Note ---- The work will not be calculated if the du_dl endpoints are not close to 0 or if any norm of force per atom exceeds 20000 kJ/(mol*nm) [MAX_NORM_FORCE defined in docking/report.py] """ if not os.path.exists(outdir): os.makedirs(outdir) print(f""" HOST_PDBFILE = {host_pdbfile} GUESTS_SDFILE = {guests_sdfile} OUTDIR = {outdir} DELETION_MAX_LAMBDA = {DELETION_MAX_LAMBDA} MIN_LAMBDA = {MIN_LAMBDA} insertion_max_lambda = {insertion_max_lambda} insertion_steps = {insertion_steps} eq1_steps = {eq1_steps} num_deletions = {num_deletions} deletion_steps = {deletion_steps} """) # Prepare host # TODO: handle extra (non-transitioning) guests? print("Solvating host...") ( solvated_host_system, solvated_host_coords, _, _, host_box, solvated_topology, ) = builders.build_protein_system(host_pdbfile) _, solvated_host_pdb = tempfile.mkstemp(suffix=".pdb", text=True) writer = pdb_writer.PDBWriter([solvated_topology], solvated_host_pdb) writer.write_frame(solvated_host_coords) writer.close() solvated_host_mol = Chem.MolFromPDBFile(solvated_host_pdb, removeHs=False) os.remove(solvated_host_pdb) # Prepare water box print("Generating water box...") # TODO: water box probably doesn't need to be this big box_lengths = host_box[np.diag_indices(3)] water_box_width = min(box_lengths) ( water_system, water_coords, water_box, water_topology, ) = builders.build_water_system(water_box_width) # it's okay if the water box here and the solvated protein box don't align -- they have PBCs _, water_pdb = tempfile.mkstemp(suffix=".pdb", text=True) writer = pdb_writer.PDBWriter([water_topology], water_pdb) writer.write_frame(water_coords) writer.close() water_mol = Chem.MolFromPDBFile(water_pdb, removeHs=False) os.remove(water_pdb) ff = Forcefield.load_from_file("smirnoff_1_1_0_ccc.py") # Run the procedure all_works = defaultdict(dict) print("Getting guests...") suppl = Chem.SDMolSupplier(guests_sdfile, removeHs=False) for guest_mol in suppl: start_time = time.time() guest_name = guest_mol.GetProp("_Name") guest_conformer = guest_mol.GetConformer(0) orig_guest_coords = np.array(guest_conformer.GetPositions(), dtype=np.float64) orig_guest_coords = orig_guest_coords / 10 # convert to md_units for system, coords, host_mol, box, label in zip( [solvated_host_system, water_system], [solvated_host_coords, water_coords], [solvated_host_mol, water_mol], [host_box, water_box], ["protein", "solvent"], ): minimized_coords = minimizer.minimize_host_4d([guest_mol], system, coords, ff, box) afe = free_energy.AbsoluteFreeEnergy(guest_mol, ff) ups, sys_params, combined_masses, combined_coords = afe.prepare_host_edge( ff.get_ordered_params(), system, minimized_coords) combined_bps = [] for up, sp in zip(ups, sys_params): combined_bps.append(up.bind(sp)) works = run_leg( minimized_coords, orig_guest_coords, combined_bps, combined_masses, box, guest_name, label, host_mol, guest_mol, outdir, num_deletions, deletion_steps, insertion_max_lambda, insertion_steps, eq1_steps, fewer_outfiles, no_outfiles, ) all_works[guest_name][label] = works end_time = time.time() print( f"{guest_name} {label} leg time:", "%.2f" % (end_time - start_time), "seconds", ) return all_works
def benchmark_hif2a(verbose=False, num_batches=100, steps_per_batch=1000): from timemachine.testsystems.relative import hif2a_ligand_pair as testsystem mol_a, mol_b, core = testsystem.mol_a, testsystem.mol_b, testsystem.core ff = Forcefield.load_from_file("smirnoff_1_1_0_sc.py") single_topology = SingleTopology(mol_a, mol_b, core, ff) rfe = free_energy.RelativeFreeEnergy(single_topology) ff_params = ff.get_ordered_params() # build the protein system. complex_system, complex_coords, _, _, complex_box, _ = builders.build_protein_system( "tests/data/hif2a_nowater_min.pdb" ) solvent_system, solvent_coords, solvent_box, _ = builders.build_water_system(4.0) for stage, host_system, host_coords, host_box in [ ("hif2a", complex_system, complex_coords, complex_box), ("solvent", solvent_system, solvent_coords, solvent_box), ]: host_fns, host_masses = openmm_deserializer.deserialize_system(host_system, cutoff=1.0) # resolve host clashes min_host_coords = minimizer.minimize_host_4d([mol_a, mol_b], host_system, host_coords, ff, host_box) x0 = min_host_coords v0 = np.zeros_like(x0) # lamb = 0.0 benchmark( stage + "-apo", host_masses, 0.0, x0, v0, host_box, host_fns, verbose=verbose, num_batches=num_batches, steps_per_batch=steps_per_batch, ) benchmark( stage + "-apo-barostat-interval-25", host_masses, 0.0, x0, v0, host_box, host_fns, verbose=verbose, num_batches=num_batches, steps_per_batch=steps_per_batch, barostat_interval=25, ) # RBFE unbound_potentials, sys_params, masses, coords = rfe.prepare_host_edge(ff_params, host_system, x0) bound_potentials = [x.bind(y) for (x, y) in zip(unbound_potentials, sys_params)] x0 = coords v0 = np.zeros_like(x0) # lamb = 0.5 benchmark( stage + "-rbfe-with-du-dp", masses, 0.5, x0, v0, host_box, bound_potentials, verbose=verbose, num_batches=num_batches, steps_per_batch=steps_per_batch, ) for du_dl_interval in [0, 1, 5]: benchmark( stage + "-rbfe-du-dl-interval-" + str(du_dl_interval), masses, 0.5, x0, v0, host_box, bound_potentials, verbose=verbose, num_batches=num_batches, steps_per_batch=steps_per_batch, compute_du_dl_interval=du_dl_interval, )