def main(args, stage): # benzene = Chem.AddHs(Chem.MolFromSmiles("c1ccccc1")) # a # phenol = Chem.AddHs(Chem.MolFromSmiles("Oc1ccccc1")) # b #01234567890 benzene = Chem.AddHs(Chem.MolFromSmiles("C1=CC=C2C=CC=CC2=C1")) # a phenol = Chem.AddHs(Chem.MolFromSmiles("C1=CC=C2C=CC=CC2=C1")) # b AllChem.EmbedMolecule(benzene) AllChem.EmbedMolecule(phenol) ff_handlers = deserialize_handlers( open('ff/params/smirnoff_1_1_0_ccc.py').read()) r_benzene = Recipe.from_rdkit(benzene, ff_handlers) r_phenol = Recipe.from_rdkit(phenol, ff_handlers) r_combined = r_benzene.combine(r_phenol) core_pairs = np.array( [ [0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5], [6, 6], [7, 7], [8, 8], [9, 9], # [10,10] ], dtype=np.int32) core_pairs[:, 1] += benzene.GetNumAtoms() a_idxs = np.arange(benzene.GetNumAtoms()) b_idxs = np.arange(phenol.GetNumAtoms()) + benzene.GetNumAtoms() core_k = 20.0 if stage == 0: centroid_k = 200.0 rbfe.stage_0(r_combined, b_idxs, core_pairs, centroid_k, core_k) # lambda_schedule = np.linspace(0.0, 1.0, 2) # lambda_schedule = np.array([0.0, 0.0, 0.0, 0.0, 0.0]) lambda_schedule = np.array([0.0, 0.0, 0.0, 0.0, 0.0]) elif stage == 1: rbfe.stage_1(r_combined, a_idxs, b_idxs, core_pairs, core_k) lambda_schedule = np.linspace(0.0, 1.2, 60) else: assert 0 system, host_coords, box, topology = builders.build_water_system(4.0) r_host = Recipe.from_openmm(system) r_final = r_host.combine(r_combined) # minimize coordinates of host + ligand A ha_coords = np.concatenate([host_coords, get_romol_conf(benzene)]) pool = Pool(args.num_gpus) # we need to run this in a subprocess since the cuda runtime # must not be initialized in the master thread due to lack of # fork safety r_minimize = minimize_setup(r_host, r_benzene) ha_coords = pool.map( minimize, [(r_minimize.bound_potentials, r_minimize.masses, ha_coords, box)], chunksize=1) # this is a list ha_coords = ha_coords[0] pool.close() pool = Pool(args.num_gpus) x0 = np.concatenate([ha_coords, get_romol_conf(phenol)]) masses = np.concatenate([r_host.masses, r_benzene.masses, r_phenol.masses]) seed = np.random.randint(np.iinfo(np.int32).max) intg = LangevinIntegrator(300.0, 1.5e-3, 1.0, masses, seed) # production run at various values of lambda for epoch in range(10): avg_du_dls = [] run_args = [] for lamb_idx, lamb in enumerate(lambda_schedule): run_args.append( (lamb, intg, r_final.bound_potentials, r_final.masses, x0, box, lamb_idx % args.num_gpus, stage)) avg_du_dls = pool.map(run, run_args, chunksize=1) print("stage", stage, "epoch", epoch, "dG", np.trapz(avg_du_dls, lambda_schedule))
cmd_args = parser.parse_args() multiprocessing.set_start_method('spawn') # CUDA runtime is not forkable pool = multiprocessing.Pool(cmd_args.num_gpus) suppl = Chem.SDMolSupplier('tests/data/benzene_fluorinated.sdf', removeHs=False) all_mols = [x for x in suppl] mol_a = all_mols[0] mol_b = all_mols[1] ff_handlers = deserialize_handlers(open('ff/params/smirnoff_1_1_0_ccc.py').read()) ff = Forcefield(ff_handlers) # the water system first. solvent_system, solvent_coords, solvent_box, omm_topology = builders.build_water_system(4.0) solvent_box += np.eye(3)*0.1 # BFGS this later print("Minimizing the host structure to remove clashes.") minimized_solvent_coords = minimizer.minimize_host_4d(mol_a, solvent_system, solvent_coords, ff, solvent_box) absolute_lambda_schedule = np.concatenate([ np.linspace(0.0, 0.333, cmd_args.num_absolute_windows - cmd_args.num_absolute_windows//3, endpoint=False), np.linspace(0.333, 1.0, cmd_args.num_absolute_windows//3), ]) abs_dGs = [] for idx, mol in enumerate([mol_a, mol_b]): afe = free_energy.AbsoluteFreeEnergy(mol, ff)
def run_epoch(ff, mol_a, mol_b, core): # build the protein system. complex_system, complex_coords, _, _, complex_box = builders.build_protein_system('tests/data/hif2a_nowater_min.pdb') complex_box += np.eye(3)*0.1 # BFGS this later # build the water system. solvent_system, solvent_coords, solvent_box, _ = builders.build_water_system(4.0) solvent_box += np.eye(3)*0.1 # BFGS this later combined_handle_and_grads = {} stage_dGs = [] for stage, host_system, host_coords, host_box, num_host_windows in [ ("complex", complex_system, complex_coords, complex_box, cmd_args.num_complex_windows), ("solvent", solvent_system, solvent_coords, solvent_box, cmd_args.num_solvent_windows)]: A = int(.35*num_host_windows) B = int(.30*num_host_windows) C = num_host_windows - A - B # Emprically, we see the largest variance in std <du/dl> near the endpoints in the nonbonded # terms. Bonded terms are roughly linear. So we add more lambda windows at the endpoint to # help improve convergence. lambda_schedule = np.concatenate([ np.linspace(0.0, 0.25, A, endpoint=False), np.linspace(0.25, 0.75, B, endpoint=False), np.linspace(0.75, 1.0, C, endpoint=True) ]) assert len(lambda_schedule) == num_host_windows print("Minimizing the host structure to remove clashes.") minimized_host_coords = minimizer.minimize_host_4d(mol_a, host_system, host_coords, ff, host_box) rfe = free_energy.RelativeFreeEnergy(mol_a, mol_b, core, ff) # solvent leg host_args = [] for lambda_idx, lamb in enumerate(lambda_schedule): gpu_idx = lambda_idx % cmd_args.num_gpus host_args.append((gpu_idx, lamb, host_system, minimized_host_coords, host_box, cmd_args.num_equil_steps, cmd_args.num_prod_steps)) results = pool.map(functools.partial(wrap_method, fn=rfe.host_edge), host_args, chunksize=1) ghs = [] for lamb, (bonded_du_dl, nonbonded_du_dl, grads_and_handles) in zip(lambda_schedule, results): ghs.append(grads_and_handles) print("final", stage, "lambda", lamb, "bonded:", bonded_du_dl[0], bonded_du_dl[1], "nonbonded:", nonbonded_du_dl[0], nonbonded_du_dl[1]) dG_host = np.trapz([x[0][0]+x[1][0] for x in results], lambda_schedule) stage_dGs.append(dG_host) # use gradient information from the endpoints for (grad_lhs, handle_type_lhs), (grad_rhs, handle_type_rhs) in zip(ghs[0], ghs[-1]): assert handle_type_lhs == handle_type_rhs # ffs are forked so the return handler isn't same object as that of ff grad = grad_rhs - grad_lhs # complex - solvent if handle_type_lhs not in combined_handle_and_grads: combined_handle_and_grads[handle_type_lhs] = grad else: combined_handle_and_grads[handle_type_lhs] -= grad print(stage, "pred_dG:", dG_host) pred = stage_dGs[0] - stage_dGs[1] loss = np.abs(pred - label) print("loss", loss, "pred", pred, "label", label) dl_dpred = np.sign(pred - label) # (ytz): these should be made configurable later on. gradient_clip_thresholds = { nonbonded.AM1CCCHandler: 0.05, nonbonded.LennardJonesHandler: np.array([0.001,0]) } # update gradients in place. # for handle_type, grad in combined_handle_and_grads.items(): for handle_type, grad in combined_handle_and_grads.items(): if handle_type in gradient_clip_thresholds: bounds = gradient_clip_thresholds[handle_type] dl_dp = dl_dpred*grad # chain rule # lots of room to improve here. dl_dp = np.clip(dl_dp, -bounds, bounds) # clip gradients so they're well behaved if handle_type == nonbonded.AM1CCCHandler: # sanity check as we have other charge methods that exist assert handle_type == type(ff.q_handle) ff.q_handle.params -= dl_dp # useful for debugging to dump out the grads # for smirks, dp in zip(ff.q_handle.smirks, dl_dp): # if np.any(dp) > 0: # print(smirks, dp) elif handle_type == nonbonded.LennardJonesHandler: # sanity check again, even though we don't have other lj methods currently assert handle_type == type(ff.lj_handle) ff.lj_handle.params -= dl_dp
romol_b = Chem.AddHs(Chem.MolFromSmiles("CC(=O)OC1=CC=CC=C1C(=O)OC")) ligand_masses_a = [a.GetMass() for a in romol_a.GetAtoms()] ligand_masses_b = [a.GetMass() for a in romol_b.GetAtoms()] # generate conformers AllChem.EmbedMolecule(romol_a) AllChem.EmbedMolecule(romol_b) # extract the 0th conformer ligand_coords_a = get_romol_conf(romol_a) ligand_coords_b = get_romol_conf(romol_b) # construct a 4-nanometer water box (from openmmtools approach: selecting out # of a large pre-equilibrated water box snapshot) system, host_coords, box, omm_topology = builders.build_water_system(4.0) # padding to avoid jank box = box + np.eye(3) * 0.1 host_bps, host_masses = openmm_deserializer.deserialize_system(system, cutoff=1.2) combined_masses = np.concatenate( [host_masses, ligand_masses_a, ligand_masses_b]) # minimize coordinates # note: .py file rather than .offxml file # note: _ccc suffix means "correctable charge corrections" ff_handlers = deserialize_handlers(
def calculate_rigorous_work( host_pdbfile, guests_sdfile, outdir, fewer_outfiles=False, no_outfiles=False ): """ """ if not os.path.exists(outdir): os.makedirs(outdir) print( f""" HOST_PDBFILE = {host_pdbfile} GUESTS_SDFILE = {guests_sdfile} OUTDIR = {outdir} INSERTION_MAX_LAMBDA = {INSERTION_MAX_LAMBDA} DELETION_MAX_LAMBDA = {DELETION_MAX_LAMBDA} MIN_LAMBDA = {MIN_LAMBDA} TRANSITION_STEPS = {TRANSITION_STEPS} EQ1_STEPS = {EQ1_STEPS} EQ2_STEPS = {EQ2_STEPS} """ ) # Prepare host # TODO: handle extra (non-transitioning) guests? print("Solvating host...") ( solvated_host_system, solvated_host_coords, _, _, host_box, solvated_topology, ) = builders.build_protein_system(host_pdbfile) # sometimes water boxes are sad. Should be minimized first; this is a workaround host_box += np.eye(3) * 0.1 print("host box", host_box) solvated_host_pdb = os.path.join(outdir, "solvated_host.pdb") writer = pdb_writer.PDBWriter([solvated_topology], solvated_host_pdb) writer.write_frame(solvated_host_coords) writer.close() solvated_host_mol = Chem.MolFromPDBFile(solvated_host_pdb, removeHs=False) if no_outfiles: os.remove(solvated_host_pdb) final_host_potentials = [] host_potentials, host_masses = openmm_deserializer.deserialize_system(solvated_host_system, cutoff=1.2) host_nb_bp = None for bp in host_potentials: if isinstance(bp, potentials.Nonbonded): # (ytz): hack to ensure we only have one nonbonded term assert host_nb_bp is None host_nb_bp = bp else: final_host_potentials.append(bp) # Prepare water box print("Generating water box...") # TODO: water box probably doesn't need to be this big box_lengths = host_box[np.diag_indices(3)] water_box_width = min(box_lengths) ( water_system, orig_water_coords, water_box, water_topology, ) = builders.build_water_system(water_box_width) # sometimes water boxes are sad. should be minimized first; this is a workaround water_box += np.eye(3) * 0.1 print("water box", water_box) # it's okay if the water box here and the solvated protein box don't align -- they have PBCs water_pdb = os.path.join(outdir, "water_box.pdb") writer = pdb_writer.PDBWriter([water_topology], water_pdb) writer.write_frame(orig_water_coords) writer.close() water_mol = Chem.MolFromPDBFile(water_pdb, removeHs=False) if no_outfiles: os.remove(water_pdb) final_water_potentials = [] water_potentials, water_masses = openmm_deserializer.deserialize_system(water_system, cutoff=1.2) water_nb_bp = None for bp in water_potentials: if isinstance(bp, potentials.Nonbonded): # (ytz): hack to ensure we only have one nonbonded term assert water_nb_bp is None water_nb_bp = bp else: final_water_potentials.append(bp) # Run the procedure print("Getting guests...") suppl = Chem.SDMolSupplier(guests_sdfile, removeHs=False) for guest_mol in suppl: start_time = time.time() guest_name = guest_mol.GetProp("_Name") guest_conformer = guest_mol.GetConformer(0) orig_guest_coords = np.array(guest_conformer.GetPositions(), dtype=np.float64) orig_guest_coords = orig_guest_coords / 10 # convert to md_units guest_ff_handlers = deserialize_handlers( open( os.path.join( os.path.dirname(os.path.abspath(__file__)), "..", "ff/params/smirnoff_1_1_0_ccc.py", ) ).read() ) ff = Forcefield(guest_ff_handlers) guest_base_top = topology.BaseTopology(guest_mol, ff) # combine host & guest hgt = topology.HostGuestTopology(host_nb_bp, guest_base_top) # setup the parameter handlers for the ligand bonded_tuples = [ [hgt.parameterize_harmonic_bond, ff.hb_handle], [hgt.parameterize_harmonic_angle, ff.ha_handle], [hgt.parameterize_proper_torsion, ff.pt_handle], [hgt.parameterize_improper_torsion, ff.it_handle] ] combined_bps = list(final_host_potentials) # instantiate the vjps while parameterizing (forward pass) for fn, handle in bonded_tuples: params, potential = fn(handle.params) combined_bps.append(potential.bind(params)) nb_params, nb_potential = hgt.parameterize_nonbonded(ff.q_handle.params, ff.lj_handle.params) combined_bps.append(nb_potential.bind(nb_params)) guest_masses = [a.GetMass() for a in guest_mol.GetAtoms()] combined_masses = np.concatenate([host_masses, guest_masses]) run_leg( solvated_host_coords, orig_guest_coords, combined_bps, combined_masses, host_box, guest_name, "host", solvated_host_mol, guest_mol, outdir, fewer_outfiles, no_outfiles, ) end_time = time.time() print( f"{guest_name} host leg time:", "%.2f" % (end_time - start_time), "seconds" ) # combine water & guest wgt = topology.HostGuestTopology(water_nb_bp, guest_base_top) # setup the parameter handlers for the ligand bonded_tuples = [ [wgt.parameterize_harmonic_bond, ff.hb_handle], [wgt.parameterize_harmonic_angle, ff.ha_handle], [wgt.parameterize_proper_torsion, ff.pt_handle], [wgt.parameterize_improper_torsion, ff.it_handle] ] combined_bps = list(final_water_potentials) # instantiate the vjps while parameterizing (forward pass) for fn, handle in bonded_tuples: params, potential = fn(handle.params) combined_bps.append(potential.bind(params)) nb_params, nb_potential = wgt.parameterize_nonbonded(ff.q_handle.params, ff.lj_handle.params) combined_bps.append(nb_potential.bind(nb_params)) guest_masses = [a.GetMass() for a in guest_mol.GetAtoms()] combined_masses = np.concatenate([water_masses, guest_masses]) start_time = time.time() run_leg( orig_water_coords, orig_guest_coords, combined_bps, combined_masses, water_box, guest_name, "water", water_mol, guest_mol, outdir, fewer_outfiles, no_outfiles, ) end_time = time.time() print( f"{guest_name} water leg time:", "%.2f" % (end_time - start_time), "seconds" )