Exemplo n.º 1
0
def generate_waterbox_nb_args() -> NonbondedArgs:

    system, positions, box, _ = builders.build_water_system(3.0)
    bps, masses = openmm_deserializer.deserialize_system(system, cutoff=1.2)
    nb = bps[-1]
    params = nb.params

    conf = positions.value_in_unit(unit.nanometer)

    N = conf.shape[0]
    beta = nb.get_beta()
    cutoff = nb.get_cutoff()

    lamb = 0.0
    charge_rescale_mask = onp.ones((N, N))
    lj_rescale_mask = onp.ones((N, N))
    lambda_plane_idxs = np.zeros(N, dtype=int)
    lambda_offset_idxs = np.zeros(N, dtype=int)

    args = (
        conf,
        params,
        box,
        lamb,
        charge_rescale_mask,
        lj_rescale_mask,
        beta,
        cutoff,
        lambda_plane_idxs,
        lambda_offset_idxs,
    )

    return args
Exemplo n.º 2
0
def test_jax_nonbonded_block():
    """Assert that nonbonded_block and nonbonded_on_specific_pairs agree"""
    system, positions, box, _ = builders.build_water_system(3.0)
    bps, masses = openmm_deserializer.deserialize_system(system, cutoff=1.2)
    nb = bps[-1]
    params = nb.params

    conf = positions.value_in_unit(unit.nanometer)

    N = conf.shape[0]
    beta = nb.get_beta()
    cutoff = nb.get_cutoff()

    split = 70

    def u_a(x, box, params):
        xi = x[:split]
        xj = x[split:]
        pi = params[:split]
        pj = params[split:]
        return nonbonded_block(xi, xj, box, pi, pj, beta, cutoff)

    i_s, j_s = np.indices((split, N - split))
    indices_left = i_s.flatten()
    indices_right = j_s.flatten() + split

    def u_b(x, box, params):
        vdw, es = nonbonded_v3_on_specific_pairs(x, params, box, indices_left,
                                                 indices_right, beta, cutoff)

        return np.sum(vdw + es)

    onp.testing.assert_almost_equal(u_a(conf, box, params),
                                    u_b(conf, box, params))
Exemplo n.º 3
0
    def test_nonbonded_with_box_smaller_than_cutoff(self):

        np.random.seed(4321)

        precision = np.float32
        cutoff = 1
        size = 33
        padding = 0.1

        _, coords, box, _ = builders.build_water_system(6.2)
        coords = coords / coords.unit
        coords = coords[:size]

        N = coords.shape[0]

        lambda_plane_idxs = np.random.randint(low=-2, high=2, size=N, dtype=np.int32)
        lambda_offset_idxs = np.random.randint(low=-2, high=2, size=N, dtype=np.int32)

        # Down shift box size to be only a portion of the cutoff
        charge_params, ref_potential, test_potential = prepare_water_system(
            coords, lambda_plane_idxs, lambda_offset_idxs, p_scale=1.0, cutoff=cutoff
        )

        def run_nonbonded(precision, potential, x, box, params, lamb, steps=100):
            test_impl = test_potential.unbound_impl(precision)

            x = (x.astype(np.float32)).astype(np.float64)
            params = (params.astype(np.float32)).astype(np.float64)

            assert x.ndim == 2
            # N = x.shape[0]
            # D = x.shape[1]

            assert x.dtype == np.float64
            assert params.dtype == np.float64
            for _ in range(steps):
                _ = test_impl.execute_selective(x, params, box, lamb, True, True, True, True)

        # With the default box, all is well
        run_nonbonded(precision, ref_potential, coords, box, charge_params, 0.0, steps=2)

        db_cutoff = (cutoff + padding) * 2

        # Make box with diagonals right at the limit
        box = np.eye(3) * db_cutoff
        run_nonbonded(precision, ref_potential, coords, box, charge_params, 0.0)

        # Non Orth Box, should fail
        box = np.ones_like(box) * (db_cutoff ** 2)
        with self.assertRaises(RuntimeError) as raised:
            run_nonbonded(precision, ref_potential, coords, box, charge_params, 0.0)
        assert "non-ortholinear box" in str(raised.exception)
        # Only populate the diag with values that are too low
        box = np.eye(3) * (db_cutoff * 0.3)
        with self.assertRaises(RuntimeError) as raised:
            run_nonbonded(precision, ref_potential, coords, box, charge_params, 0.0)
        assert "more than half" in str(raised.exception)
Exemplo n.º 4
0
def test_barostat_zero_interval():
    pressure = 1.0 * unit.atmosphere
    temperature = 300.0 * unit.kelvin
    initial_waterbox_width = 2.5 * unit.nanometer
    seed = 2021
    np.random.seed(seed)

    mol_a = hif2a_ligand_pair.mol_a
    ff = hif2a_ligand_pair.ff
    complex_system, complex_coords, complex_box, complex_top = build_water_system(
        initial_waterbox_width.value_in_unit(unit.nanometer))

    afe = AbsoluteFreeEnergy(mol_a, ff)

    unbound_potentials, sys_params, masses, coords = afe.prepare_host_edge(
        ff.get_ordered_params(), complex_system, complex_coords)

    # get list of molecules for barostat by looking at bond table
    harmonic_bond_potential = unbound_potentials[0]
    bond_list = get_bond_list(harmonic_bond_potential)
    group_indices = get_group_indices(bond_list)

    bound_potentials = []
    for params, unbound_pot in zip(sys_params, unbound_potentials):
        bp = unbound_pot.bind(np.asarray(params))
        bound_potentials.append(bp)

    u_impls = []
    for bp in bound_potentials:
        bp_impl = bp.bound_impl(precision=np.float32)
        u_impls.append(bp_impl)

    with pytest.raises(RuntimeError):
        custom_ops.MonteCarloBarostat(
            coords.shape[0],
            pressure.value_in_unit(unit.bar),
            temperature.value_in_unit(unit.kelvin),
            group_indices,
            0,
            u_impls,
            seed,
        )
    # Setting it to 1 should be valid.
    baro = custom_ops.MonteCarloBarostat(
        coords.shape[0],
        pressure.value_in_unit(unit.bar),
        temperature.value_in_unit(unit.kelvin),
        group_indices,
        1,
        u_impls,
        seed,
    )
    # Setting back to 0 should raise another error
    with pytest.raises(RuntimeError):
        baro.set_interval(0)
Exemplo n.º 5
0
def get_solvent_phase_system(mol, ff):
    masses = np.array([a.GetMass() for a in mol.GetAtoms()])
    water_system, water_coords, water_box, water_topology = builders.build_water_system(3.0)
    water_box = water_box + np.eye(3) * 0.5  # add a small margin around the box for stability
    num_water_atoms = len(water_coords)
    afe = free_energy.AbsoluteFreeEnergy(mol, ff)
    ff_params = ff.get_ordered_params()
    ubps, params, masses, coords = afe.prepare_host_edge(ff_params, water_system, water_coords)

    host_coords = coords[:num_water_atoms]
    new_host_coords = minimizer.minimize_host_4d([mol], water_system, host_coords, ff, water_box)
    coords[:num_water_atoms] = new_host_coords

    return ubps, params, masses, coords, water_box
Exemplo n.º 6
0
    def test_nblist_box_resize(self):
        # test that running the coordinates under two different boxes produces correct results
        # since we should be rebuilding the nblist when the box sizes change.

        host_system, host_coords, box, _ = builders.build_water_system(3.0)

        host_fns, host_masses = openmm_deserializer.deserialize_system(host_system, cutoff=1.0)

        for f in host_fns:
            if isinstance(f, potentials.Nonbonded):
                test_nonbonded_fn = f

        host_conf = []
        for x, y, z in host_coords:
            host_conf.append([to_md_units(x), to_md_units(y), to_md_units(z)])
        host_conf = np.array(host_conf)

        lamb = 0.1

        ref_nonbonded_fn = prepare_reference_nonbonded(
            test_nonbonded_fn.params,
            test_nonbonded_fn.get_exclusion_idxs(),
            test_nonbonded_fn.get_scale_factors(),
            test_nonbonded_fn.get_lambda_plane_idxs(),
            test_nonbonded_fn.get_lambda_offset_idxs(),
            test_nonbonded_fn.get_beta(),
            test_nonbonded_fn.get_cutoff(),
        )

        big_box = box + np.eye(3) * 1000

        # print(big_box, small_box)
        # (ytz): note the ordering should be from large box to small box. though in the current code
        # the rebuild is triggered as long as the box *changes*.
        for test_box in [big_box, box]:

            for precision, rtol, atol in [(np.float64, 1e-8, 1e-10), (np.float32, 1e-4, 3e-5)]:

                self.compare_forces(
                    host_conf,
                    test_nonbonded_fn.params,
                    test_box,
                    lamb,
                    ref_nonbonded_fn,
                    test_nonbonded_fn,
                    rtol=rtol,
                    atol=atol,
                    precision=precision,
                )
Exemplo n.º 7
0
    def test_random_directory(self):
        with TemporaryDirectory(prefix="timemachine") as temp_dir:
            orig_dir = os.getcwd()
            os.chdir(temp_dir)
            try:
                # build a pair of alchemical ligands in a water box
                mol_a, mol_b, _, ff = (
                    hif2a_ligand_pair.mol_a,
                    hif2a_ligand_pair.mol_b,
                    hif2a_ligand_pair.core,
                    hif2a_ligand_pair.ff,
                )
                complex_system, complex_coords, complex_box, complex_top = build_water_system(2.6)

                # Creates a custom_ops.Context which triggers JIT
                minimize_host_4d([mol_a, mol_b], complex_system, complex_coords, ff, complex_box)
            finally:
                os.chdir(orig_dir)
Exemplo n.º 8
0
    def test_pre_equilibration(self):
        """Verify that equilibration of edges up front functions as expected"""
        complex_system, complex_coords, _, _, complex_box, _ = builders.build_protein_system(
            os.path.join(DATA_DIR, "hif2a_nowater_min.pdb"))

        # build the water system
        solvent_system, solvent_coords, solvent_box, _ = builders.build_water_system(
            4.0)
        client = CUDAPoolClient(NUM_GPUS)

        model = RBFEModel(
            client=client,
            ff=hif2a_ligand_pair.ff,
            complex_system=complex_system,
            complex_coords=complex_coords,
            complex_box=complex_box,
            complex_schedule=construct_lambda_schedule(2),
            solvent_system=solvent_system,
            solvent_coords=solvent_coords,
            solvent_box=solvent_box,
            solvent_schedule=construct_lambda_schedule(2),
            equil_steps=10,
            prod_steps=100,
        )

        mol_a = hif2a_ligand_pair.mol_a
        mol_b = hif2a_ligand_pair.mol_b
        core = hif2a_ligand_pair.core
        assert len(model._equil_cache) == 0
        with TemporaryDirectory() as tempdir:
            cache_path = os.path.join(tempdir, "equil_cache.pkl")
            # If model.pre_equilibrate is false, its a noop
            model.equilibrate_edges([(mol_a, mol_b, core)],
                                    equilibration_steps=10,
                                    cache_path=cache_path)
            assert len(model._equil_cache) == 0

            # Enable pre-equilibration
            model.pre_equilibrate = True
            model.equilibrate_edges([(mol_a, mol_b, core)],
                                    equilibration_steps=10,
                                    cache_path=cache_path)
            # Cache should contain starting coords for both solvent and complex stages
            assert len(model._equil_cache) == 2
Exemplo n.º 9
0
def test_equilibrate_host():
    host_system, host_coords, host_box, _ = builders.build_water_system(4.0)

    suppl = Chem.SDMolSupplier("tests/data/ligands_40.sdf", removeHs=False)
    mol = next(suppl)

    ff = Forcefield.load_from_file("smirnoff_1_1_0_ccc.py")

    coords, box = minimizer.equilibrate_host(mol,
                                             host_system,
                                             host_coords,
                                             300,
                                             1.0,
                                             ff,
                                             host_box,
                                             25,
                                             seed=2022)
    assert coords.shape[0] == host_coords.shape[0] + mol.GetNumAtoms()
    assert coords.shape[1] == host_coords.shape[1]
    assert box.shape == host_box.shape
Exemplo n.º 10
0
def test_write_single_topology_frame():
    top = hif2a_ligand_pair.top
    assert isinstance(top, SingleTopology)

    ff_params = hif2a_ligand_pair.top.ff.get_ordered_params()

    solvent_system, solvent_coords, solvent_box, solvent_top = builders.build_water_system(4.0)

    unbound_potentials, sys_params, masses, coords = hif2a_ligand_pair.prepare_host_edge(
        ff_params, solvent_system, solvent_coords
    )

    coords *= 10  # nm to angstroms
    with NamedTemporaryFile(suffix=".pdb") as temp:
        writer = PDBWriter([solvent_top, top.mol_a, top.mol_b], temp.name)
        with pytest.raises(ValueError):
            # Should fail, as incorrect number of coords
            writer.write_frame(coords)
        ligand_coords = convert_single_topology_mols(coords[len(solvent_coords) :], top)
        writer.write_frame(np.concatenate((coords[: len(solvent_coords)], ligand_coords), axis=0))
        writer.close()
Exemplo n.º 11
0
    def test_nonbonded(self):

        np.random.seed(4321)

        for size in [33, 231, 1050]:

            _, coords, box, _ = builders.build_water_system(6.2)
            coords = coords / coords.unit
            coords = coords[:size]

            N = coords.shape[0]

            lambda_plane_idxs = np.random.randint(low=-2, high=2, size=N, dtype=np.int32)
            lambda_offset_idxs = np.random.randint(low=-2, high=2, size=N, dtype=np.int32)

            for precision, rtol, atol in [(np.float64, 1e-8, 3e-11), (np.float32, 1e-4, 3e-5)]:

                for cutoff in [1.0]:
                    # E = 0 # DEBUG!
                    charge_params, ref_potential, test_potential = prepare_water_system(
                        coords, lambda_plane_idxs, lambda_offset_idxs, p_scale=1.0, cutoff=cutoff
                    )

                    for lamb in [0.0, 0.1, 0.2]:

                        print("lambda", lamb, "cutoff", cutoff, "precision", precision, "xshape", coords.shape)

                        self.compare_forces(
                            coords,
                            charge_params,
                            box,
                            lamb,
                            ref_potential,
                            test_potential,
                            rtol=rtol,
                            atol=atol,
                            precision=precision,
                        )
Exemplo n.º 12
0
    def test_predict(self):
        """Just to verify that we can handle the most basic RBFE prediction"""
        # Use the Simple Charges to verify determinism of model. Needed as one endpoint uses the ff definition
        forcefield = Forcefield.load_from_file("smirnoff_1_1_0_sc.py")

        complex_system, complex_coords, _, _, complex_box, _ = builders.build_protein_system(
            os.path.join(DATA_DIR, "hif2a_nowater_min.pdb"))

        # build the water system
        solvent_system, solvent_coords, solvent_box, _ = builders.build_water_system(
            4.0)

        client = CUDAPoolClient(NUM_GPUS)

        model = RBFEModel(
            client=client,
            ff=forcefield,
            complex_system=complex_system,
            complex_coords=complex_coords,
            complex_box=complex_box,
            complex_schedule=construct_lambda_schedule(2),
            solvent_system=solvent_system,
            solvent_coords=solvent_coords,
            solvent_box=solvent_box,
            solvent_schedule=construct_lambda_schedule(2),
            equil_steps=10,
            prod_steps=100,
        )

        ordered_params = forcefield.get_ordered_params()
        mol_a = hif2a_ligand_pair.mol_a
        mol_b = hif2a_ligand_pair.mol_b
        core = hif2a_ligand_pair.core

        ddg, results = model.predict(ordered_params, mol_a, mol_b, core)
        self.assertEqual(len(results), 2)
        self.assertIsInstance(ddg, float)
Exemplo n.º 13
0
def test_relative_free_energy():
    # test that we can properly build a single topology host guest system and
    # that we can run a few steps in a stable way. This tests runs both the complex
    # and the solvent stages.

    suppl = Chem.SDMolSupplier("tests/data/ligands_40.sdf", removeHs=False)
    all_mols = [x for x in suppl]
    mol_a = all_mols[1]
    mol_b = all_mols[4]

    core = np.array([
        [0, 0],
        [2, 2],
        [1, 1],
        [6, 6],
        [5, 5],
        [4, 4],
        [3, 3],
        [15, 16],
        [16, 17],
        [17, 18],
        [18, 19],
        [19, 20],
        [20, 21],
        [32, 30],
        [26, 25],
        [27, 26],
        [7, 7],
        [8, 8],
        [9, 9],
        [10, 10],
        [29, 11],
        [11, 12],
        [12, 13],
        [14, 15],
        [31, 29],
        [13, 14],
        [23, 24],
        [30, 28],
        [28, 27],
        [21, 22],
    ])

    complex_system, complex_coords, _, _, complex_box, _ = builders.build_protein_system(
        "tests/data/hif2a_nowater_min.pdb")

    # build the water system.
    solvent_system, solvent_coords, solvent_box, _ = builders.build_water_system(
        4.0)

    ff = Forcefield.load_from_file("smirnoff_1_1_0_ccc.py")

    ff_params = ff.get_ordered_params()

    seed = 2021

    lambda_schedule = np.linspace(0, 1.0, 4)
    equil_steps = 1000
    prod_steps = 1000

    single_topology = topology.SingleTopology(mol_a, mol_b, core, ff)
    rfe = free_energy.RelativeFreeEnergy(single_topology)

    def vacuum_model(ff_params):

        unbound_potentials, sys_params, masses, coords = rfe.prepare_vacuum_edge(
            ff_params)

        x0 = coords
        v0 = np.zeros_like(coords)
        client = CUDAPoolClient(1)
        box = np.eye(3, dtype=np.float64) * 100

        harmonic_bond_potential = unbound_potentials[0]
        group_idxs = get_group_indices(get_bond_list(harmonic_bond_potential))

        x0 = coords
        v0 = np.zeros_like(coords)
        client = CUDAPoolClient(1)
        temperature = 300.0
        pressure = 1.0

        integrator = LangevinIntegrator(temperature, 1.5e-3, 1.0, masses, seed)

        barostat = MonteCarloBarostat(x0.shape[0], pressure, temperature,
                                      group_idxs, 25, seed)
        model = estimator.FreeEnergyModel(unbound_potentials, client, box, x0,
                                          v0, integrator, lambda_schedule,
                                          equil_steps, prod_steps, barostat)

        return estimator.deltaG(model, sys_params)[0]

    dG = vacuum_model(ff_params)
    assert np.abs(dG) < 1000.0

    def binding_model(ff_params):

        dGs = []

        for host_system, host_coords, host_box in [
            (complex_system, complex_coords, complex_box),
            (solvent_system, solvent_coords, solvent_box),
        ]:

            # minimize the host to avoid clashes
            host_coords = minimizer.minimize_host_4d([mol_a], host_system,
                                                     host_coords, ff, host_box)

            unbound_potentials, sys_params, masses, coords = rfe.prepare_host_edge(
                ff_params, host_system, host_coords)

            x0 = coords
            v0 = np.zeros_like(coords)
            client = CUDAPoolClient(1)

            harmonic_bond_potential = unbound_potentials[0]
            group_idxs = get_group_indices(
                get_bond_list(harmonic_bond_potential))

            temperature = 300.0
            pressure = 1.0

            integrator = LangevinIntegrator(temperature, 1.5e-3, 1.0, masses,
                                            seed)

            barostat = MonteCarloBarostat(x0.shape[0], pressure, temperature,
                                          group_idxs, 25, seed)

            model = estimator.FreeEnergyModel(
                unbound_potentials,
                client,
                host_box,
                x0,
                v0,
                integrator,
                lambda_schedule,
                equil_steps,
                prod_steps,
                barostat,
            )

            dG, _ = estimator.deltaG(model, sys_params)
            dGs.append(dG)

        return dGs[0] - dGs[1]

    dG = binding_model(ff_params)
    assert np.abs(dG) < 1000.0
Exemplo n.º 14
0
def main(args, stage):

    # benzene = Chem.AddHs(Chem.MolFromSmiles("c1ccccc1")) # a
    # phenol = Chem.AddHs(Chem.MolFromSmiles("Oc1ccccc1")) # b
    # 01234567890
    benzene = Chem.AddHs(Chem.MolFromSmiles("C1=CC=C2C=CC=CC2=C1"))  # a
    phenol = Chem.AddHs(Chem.MolFromSmiles("C1=CC=C2C=CC=CC2=C1"))  # b

    AllChem.EmbedMolecule(benzene)
    AllChem.EmbedMolecule(phenol)

    ff_handlers = Forcefield.load_from_file(
        "smirnoff_1_1_0_ccc.py").get_ordered_handles()
    r_benzene = Recipe.from_rdkit(benzene, ff_handlers)
    r_phenol = Recipe.from_rdkit(phenol, ff_handlers)

    r_combined = r_benzene.combine(r_phenol)
    core_pairs = np.array(
        [
            [0, 0],
            [1, 1],
            [2, 2],
            [3, 3],
            [4, 4],
            [5, 5],
            [6, 6],
            [7, 7],
            [8, 8],
            [9, 9],
            # [10,10]
        ],
        dtype=np.int32,
    )
    core_pairs[:, 1] += benzene.GetNumAtoms()

    a_idxs = np.arange(benzene.GetNumAtoms())
    b_idxs = np.arange(phenol.GetNumAtoms()) + benzene.GetNumAtoms()

    core_k = 20.0

    if stage == 0:
        centroid_k = 200.0
        rbfe.stage_0(r_combined, b_idxs, core_pairs, centroid_k, core_k)
        # lambda_schedule = np.linspace(0.0, 1.0, 2)
        # lambda_schedule = np.array([0.0, 0.0, 0.0, 0.0, 0.0])
        lambda_schedule = np.array([0.0, 0.0, 0.0, 0.0, 0.0])
    elif stage == 1:
        rbfe.stage_1(r_combined, a_idxs, b_idxs, core_pairs, core_k)
        lambda_schedule = np.linspace(0.0, 1.2, 60)
    else:
        assert 0

    system, host_coords, box, topology = builders.build_water_system(4.0)

    r_host = Recipe.from_openmm(system)
    r_final = r_host.combine(r_combined)

    # minimize coordinates of host + ligand A
    ha_coords = np.concatenate([host_coords, get_romol_conf(benzene)])

    pool = Pool(args.num_gpus)

    # we need to run this in a subprocess since the cuda runtime
    # must not be initialized in the master thread due to lack of
    # fork safety
    r_minimize = minimize_setup(r_host, r_benzene)
    ha_coords = pool.map(
        minimize,
        [(r_minimize.bound_potentials, r_minimize.masses, ha_coords, box)],
        chunksize=1)
    # this is a list
    ha_coords = ha_coords[0]
    pool.close()

    pool = Pool(args.num_gpus)

    x0 = np.concatenate([ha_coords, get_romol_conf(phenol)])

    masses = np.concatenate([r_host.masses, r_benzene.masses, r_phenol.masses])

    seed = np.random.randint(np.iinfo(np.int32).max)

    intg = LangevinIntegrator(300.0, 1.5e-3, 1.0, masses, seed)

    # production run at various values of lambda
    for epoch in range(10):
        avg_du_dls = []

        run_args = []
        for lamb_idx, lamb in enumerate(lambda_schedule):
            run_args.append(
                (lamb, intg, r_final.bound_potentials, r_final.masses, x0, box,
                 lamb_idx % args.num_gpus, stage))

        avg_du_dls = pool.map(run, run_args, chunksize=1)

        print("stage", stage, "epoch", epoch, "dG",
              np.trapz(avg_du_dls, lambda_schedule))
Exemplo n.º 15
0
def test_neighborlist_ligand_host():
    ligand = hif2a_ligand_pair.mol_a
    ligand_coords = get_romol_conf(ligand)

    system, host_coords, box, top = build_water_system(4.0)
    num_host_atoms = host_coords.shape[0]
    host_coords = np.array(host_coords)

    coords = np.concatenate([host_coords, ligand_coords])

    N = coords.shape[0]
    D = 3
    cutoff = 1.0
    block_size = 32
    padding = 0.1

    np.random.seed(1234)
    diag = np.amax(coords, axis=0) - np.amin(coords, axis=0) + padding
    box = np.diag(diag)

    # Can only sort the host coords, but not the row/ligand
    sort = True
    if sort:
        perm = hilbert_sort(
            coords[:num_host_atoms] + np.argmin(coords[:num_host_atoms]), D)
        coords[:num_host_atoms] = coords[:num_host_atoms][perm]

    col_coords = np.expand_dims(coords[:num_host_atoms], axis=0)
    # Compute the reference interactions of the ligand
    ref_ixn_list = []
    num_ligand_atoms = coords[num_host_atoms:].shape[0]
    num_blocks_of_32 = (num_ligand_atoms + block_size - 1) // block_size
    box_diag = np.diag(box)
    for rbidx in range(num_blocks_of_32):
        row_start = num_host_atoms + (rbidx * block_size)
        row_end = min(num_host_atoms + ((rbidx + 1) * block_size), N)
        row_coords = coords[row_start:row_end]
        row_coords = np.expand_dims(row_coords, axis=1)
        deltas = row_coords - col_coords
        deltas -= box_diag * np.floor(deltas / box_diag + 0.5)

        dij = np.linalg.norm(deltas, axis=-1)
        # Since the row and columns are unique, don't need to handle duplicates
        idxs = np.argwhere(np.any(dij < cutoff, axis=0))
        ref_ixn_list.append(idxs.reshape(-1).tolist())

    for nblist in (
            custom_ops.Neighborlist_f32(num_host_atoms, num_ligand_atoms),
            custom_ops.Neighborlist_f64(num_host_atoms, num_ligand_atoms),
    ):
        for _ in range(2):

            test_ixn_list = nblist.get_nblist_host_ligand(
                coords[:num_host_atoms], coords[num_host_atoms:], box, cutoff)
            # compute the sparsity of the tile
            assert len(ref_ixn_list) == len(
                test_ixn_list
            ), "Number of blocks with interactions don't agree"

            for bidx, (a, b) in enumerate(zip(ref_ixn_list, test_ixn_list)):
                if sorted(a) != sorted(b):
                    print("TESTING bidx", bidx)
                    print(sorted(a))
                    print(sorted(b))
                np.testing.assert_equal(sorted(a), sorted(b))
Exemplo n.º 16
0
def benchmark_hif2a(verbose=False, num_batches=100, steps_per_batch=1000):

    from timemachine.testsystems.relative import hif2a_ligand_pair as testsystem

    mol_a, mol_b, core = testsystem.mol_a, testsystem.mol_b, testsystem.core

    ff = Forcefield.load_from_file("smirnoff_1_1_0_sc.py")

    single_topology = SingleTopology(mol_a, mol_b, core, ff)
    rfe = free_energy.RelativeFreeEnergy(single_topology)

    ff_params = ff.get_ordered_params()

    # build the protein system.
    complex_system, complex_coords, _, _, complex_box, _ = builders.build_protein_system(
        "tests/data/hif2a_nowater_min.pdb"
    )

    solvent_system, solvent_coords, solvent_box, _ = builders.build_water_system(4.0)

    for stage, host_system, host_coords, host_box in [
        ("hif2a", complex_system, complex_coords, complex_box),
        ("solvent", solvent_system, solvent_coords, solvent_box),
    ]:

        host_fns, host_masses = openmm_deserializer.deserialize_system(host_system, cutoff=1.0)

        # resolve host clashes
        min_host_coords = minimizer.minimize_host_4d([mol_a, mol_b], host_system, host_coords, ff, host_box)

        x0 = min_host_coords
        v0 = np.zeros_like(x0)

        # lamb = 0.0
        benchmark(
            stage + "-apo",
            host_masses,
            0.0,
            x0,
            v0,
            host_box,
            host_fns,
            verbose=verbose,
            num_batches=num_batches,
            steps_per_batch=steps_per_batch,
        )
        benchmark(
            stage + "-apo-barostat-interval-25",
            host_masses,
            0.0,
            x0,
            v0,
            host_box,
            host_fns,
            verbose=verbose,
            num_batches=num_batches,
            steps_per_batch=steps_per_batch,
            barostat_interval=25,
        )

        # RBFE
        unbound_potentials, sys_params, masses, coords = rfe.prepare_host_edge(ff_params, host_system, x0)

        bound_potentials = [x.bind(y) for (x, y) in zip(unbound_potentials, sys_params)]

        x0 = coords
        v0 = np.zeros_like(x0)

        # lamb = 0.5
        benchmark(
            stage + "-rbfe-with-du-dp",
            masses,
            0.5,
            x0,
            v0,
            host_box,
            bound_potentials,
            verbose=verbose,
            num_batches=num_batches,
            steps_per_batch=steps_per_batch,
        )

        for du_dl_interval in [0, 1, 5]:
            benchmark(
                stage + "-rbfe-du-dl-interval-" + str(du_dl_interval),
                masses,
                0.5,
                x0,
                v0,
                host_box,
                bound_potentials,
                verbose=verbose,
                num_batches=num_batches,
                steps_per_batch=steps_per_batch,
                compute_du_dl_interval=du_dl_interval,
            )
Exemplo n.º 17
0
def test_barostat_partial_group_idxs():
    """Verify that the barostat can handle a subset of the molecules
    rather than all of them. This test only verify that it runs, not the behavior"""
    temperature = 300.0 * unit.kelvin
    initial_waterbox_width = 3.0 * unit.nanometer
    timestep = 1.5 * unit.femtosecond
    barostat_interval = 3
    collision_rate = 1.0 / unit.picosecond
    seed = 2021
    np.random.seed(seed)

    pressure = 1.0 * unit.atmosphere
    mol_a = hif2a_ligand_pair.mol_a
    ff = hif2a_ligand_pair.ff
    complex_system, complex_coords, complex_box, complex_top = build_water_system(
        initial_waterbox_width.value_in_unit(unit.nanometer))

    min_complex_coords = minimize_host_4d([mol_a], complex_system,
                                          complex_coords, ff, complex_box)
    afe = AbsoluteFreeEnergy(mol_a, ff)

    unbound_potentials, sys_params, masses, coords = afe.prepare_host_edge(
        ff.get_ordered_params(), complex_system, min_complex_coords)

    # get list of molecules for barostat by looking at bond table
    harmonic_bond_potential = unbound_potentials[0]
    bond_list = get_bond_list(harmonic_bond_potential)
    group_indices = get_group_indices(bond_list)

    # Cut the number of groups in half
    group_indices = group_indices[len(group_indices) // 2:]
    lam = 1.0

    bound_potentials = []
    for params, unbound_pot in zip(sys_params, unbound_potentials):
        bp = unbound_pot.bind(np.asarray(params))
        bound_potentials.append(bp)

    u_impls = []
    for bp in bound_potentials:
        bp_impl = bp.bound_impl(precision=np.float32)
        u_impls.append(bp_impl)

    integrator = LangevinIntegrator(
        temperature.value_in_unit(unit.kelvin),
        timestep.value_in_unit(unit.picosecond),
        collision_rate.value_in_unit(unit.picosecond**-1),
        masses,
        seed,
    )
    integrator_impl = integrator.impl()

    v_0 = sample_velocities(masses * unit.amu, temperature)

    baro = custom_ops.MonteCarloBarostat(
        coords.shape[0],
        pressure.value_in_unit(unit.bar),
        temperature.value_in_unit(unit.kelvin),
        group_indices,
        barostat_interval,
        u_impls,
        seed,
    )

    ctxt = custom_ops.Context(coords,
                              v_0,
                              complex_box,
                              integrator_impl,
                              u_impls,
                              barostat=baro)
    ctxt.multiple_steps(np.ones(1000) * lam)
Exemplo n.º 18
0
    def test_predict_absolute_conversion(self):
        """Just to verify that we can handle the most basic conversion RABFE prediction"""
        # Use the Simple Charges to verify determinism of model. Needed as one endpoint uses the ff definition
        forcefield = Forcefield.load_from_file("smirnoff_1_1_0_sc.py")

        # build the water system
        solvent_system, solvent_coords, solvent_box, solvent_topology = builders.build_water_system(
            4.0)

        temperature = 300.0
        pressure = 1.0
        dt = 2.5e-3

        client = CUDAPoolClient(NUM_GPUS)

        model = AbsoluteConversionModel(
            client,
            forcefield,
            solvent_system,
            construct_lambda_schedule(2),
            solvent_topology,
            temperature,
            pressure,
            dt,
            10,
            50,
            frame_filter=all_frames,
        )
        mol_a = hif2a_ligand_pair.mol_a
        mol_b = hif2a_ligand_pair.mol_b

        core_idxs = setup_relative_restraints_by_distance(mol_a, mol_b)

        ref_coords = get_romol_conf(mol_a)
        mol_coords = get_romol_conf(mol_b)  # original coords

        # Use core_idxs to generate
        R, t = rmsd.get_optimal_rotation_and_translation(
            x1=ref_coords[core_idxs[:, 1]],  # reference core atoms
            x2=mol_coords[core_idxs[:, 0]],  # mol core atoms
        )

        aligned_mol_coords = rmsd.apply_rotation_and_translation(
            mol_coords, R, t)
        solvent_coords = minimizer.minimize_host_4d([mol_b], solvent_system,
                                                    solvent_coords, forcefield,
                                                    solvent_box,
                                                    [aligned_mol_coords])
        solvent_x0 = np.concatenate([solvent_coords, aligned_mol_coords])

        ordered_params = forcefield.get_ordered_params()
        with temporary_working_dir() as temp_dir:
            dG, dG_err = model.predict(ordered_params,
                                       mol_b,
                                       solvent_x0,
                                       solvent_box,
                                       "prefix",
                                       core_idxs=core_idxs[:, 0],
                                       seed=2022)
            np.testing.assert_almost_equal(dG, 46.102816, decimal=5)
            np.testing.assert_equal(dG_err, 0.0)
            created_files = os.listdir(temp_dir)
            # 2 npz, 1 pdb and 1 npy per mol due to a->b and b->a
            self.assertEqual(len(created_files), 4)
            self.assertEqual(
                len([x for x in created_files if x.endswith(".pdb")]), 1)
            self.assertEqual(
                len([x for x in created_files if x.endswith(".npy")]), 1)
            self.assertEqual(
                len([x for x in created_files if x.endswith(".npz")]), 2)
    n_replicates = 10
    initial_waterbox_width = 3.0 * unit.nanometer
    timestep = 1.5 * unit.femtosecond
    collision_rate = 1.0 / unit.picosecond
    n_moves = 2000
    barostat_interval = 5
    seed = 2021

    # thermodynamic parameters
    temperature = 300 * unit.kelvin
    pressure = 1.013 * unit.bar

    # generate an alchemical system of a waterbox + alchemical ligand:
    # effectively discard ligands by running in AbsoluteFreeEnergy mode at lambda = 1.0
    mol_a, _, core, ff = hif2a_ligand_pair.mol_a, hif2a_ligand_pair.mol_b, hif2a_ligand_pair.core, hif2a_ligand_pair.ff
    complex_system, complex_coords, complex_box, complex_top = build_water_system(
        initial_waterbox_width.value_in_unit(unit.nanometer))

    min_complex_coords = minimize_host_4d([mol_a], complex_system,
                                          complex_coords, ff, complex_box)
    afe = AbsoluteFreeEnergy(mol_a, ff)

    unbound_potentials, sys_params, masses, coords = afe.prepare_host_edge(
        ff.get_ordered_params(), complex_system, min_complex_coords)

    # define NPT ensemble
    potential_energy_model = PotentialEnergyModel(sys_params,
                                                  unbound_potentials)
    ensemble = NPTEnsemble(potential_energy_model, temperature, pressure)

    # define a thermostat
    integrator = LangevinIntegrator(
Exemplo n.º 20
0
    with open(output_path.joinpath("training_edges.pk"), "wb") as ofs:
        dump(training.data, ofs)
    if len(validation):
        with open(output_path.joinpath("validation_edges.pk"), "wb") as ofs:
            dump(validation.data, ofs)

    # Build all of the different protein systems
    systems = {}
    for prot_path in protein_paths:
        # build the complex system
        # note: "complex" means "protein + solvent"
        complex_system, complex_coords, _, _, complex_box, _ = builders.build_protein_system(
            prot_path)

        # build the water system
        solvent_system, solvent_coords, solvent_box, _ = builders.build_water_system(
            4.0)

        systems[prot_path] = RBFEModel(
            client=client,
            ff=forcefield,
            complex_system=complex_system,
            complex_coords=complex_coords,
            complex_box=complex_box,
            complex_schedule=construct_lambda_schedule(
                configuration.num_complex_windows),
            solvent_system=solvent_system,
            solvent_coords=solvent_coords,
            solvent_box=solvent_box,
            solvent_schedule=construct_lambda_schedule(
                configuration.num_solvent_windows),
            equil_steps=configuration.num_equil_steps,
Exemplo n.º 21
0
def test_molecular_ideal_gas():
    """


    References
    ----------
    OpenMM testIdealGas
    https://github.com/openmm/openmm/blob/d8ef57fed6554ec95684e53768188e1f666405c9/tests/TestMonteCarloBarostat.h#L86-L140
    """

    # simulation parameters
    initial_waterbox_width = 3.0 * unit.nanometer
    timestep = 1.5 * unit.femtosecond
    collision_rate = 1.0 / unit.picosecond
    n_moves = 10000
    barostat_interval = 5
    seed = 2021

    # thermodynamic parameters
    temperatures = np.array([300, 600, 1000]) * unit.kelvin
    pressure = 100.0 * unit.bar  # very high pressure, to keep the expected volume small

    # generate an alchemical system of a waterbox + alchemical ligand:
    # effectively discard ligands by running in AbsoluteFreeEnergy mode at lambda = 1.0
    mol_a = hif2a_ligand_pair.mol_a
    ff = hif2a_ligand_pair.ff
    complex_system, complex_coords, complex_box, complex_top = build_water_system(
        initial_waterbox_width.value_in_unit(unit.nanometer))

    min_complex_coords = minimize_host_4d([mol_a], complex_system,
                                          complex_coords, ff, complex_box)
    afe = AbsoluteFreeEnergy(mol_a, ff)

    _unbound_potentials, _sys_params, masses, coords = afe.prepare_host_edge(
        ff.get_ordered_params(), complex_system, min_complex_coords)

    # drop the nonbonded potential
    unbound_potentials = _unbound_potentials[:-1]
    sys_params = _sys_params[:-1]

    # get list of molecules for barostat by looking at bond table
    harmonic_bond_potential = unbound_potentials[0]
    bond_list = get_bond_list(harmonic_bond_potential)
    group_indices = get_group_indices(bond_list)

    volume_trajs = []

    relative_tolerance = 1e-2
    initial_relative_box_perturbation = 2 * relative_tolerance

    n_molecules = complex_top.getNumResidues()

    bound_potentials = []
    for params, unbound_pot in zip(sys_params, unbound_potentials):
        bp = unbound_pot.bind(np.asarray(params))
        bound_potentials.append(bp)

    u_impls = []
    for bp in bound_potentials:
        bp_impl = bp.bound_impl(precision=np.float32)
        u_impls.append(bp_impl)

    # expected volume
    md_pressure_unit = ENERGY_UNIT / DISTANCE_UNIT**3
    pressure_in_md = (
        pressure * unit.AVOGADRO_CONSTANT_NA).value_in_unit(md_pressure_unit)
    expected_volume_in_md = (n_molecules +
                             1) * BOLTZ * temperatures.value_in_unit(
                                 unit.kelvin) / pressure_in_md

    for i, temperature in enumerate(temperatures):

        # define a thermostat
        integrator = LangevinIntegrator(
            temperature.value_in_unit(unit.kelvin),
            timestep.value_in_unit(unit.picosecond),
            collision_rate.value_in_unit(unit.picosecond**-1),
            masses,
            seed,
        )
        integrator_impl = integrator.impl()

        v_0 = sample_velocities(masses * unit.amu, temperature)

        # rescale the box to be approximately the desired box volume already
        rescaler = CentroidRescaler(group_indices)
        initial_volume = compute_box_volume(complex_box)
        initial_center = compute_box_center(complex_box)
        length_scale = ((1 + initial_relative_box_perturbation) *
                        expected_volume_in_md[i] / initial_volume)**(1.0 / 3)
        new_coords = rescaler.scale_centroids(coords, initial_center,
                                              length_scale)
        new_box = complex_box * length_scale

        baro = custom_ops.MonteCarloBarostat(
            new_coords.shape[0],
            pressure.value_in_unit(unit.bar),
            temperature.value_in_unit(unit.kelvin),
            group_indices,
            barostat_interval,
            u_impls,
            seed,
        )

        ctxt = custom_ops.Context(new_coords,
                                  v_0,
                                  new_box,
                                  integrator_impl,
                                  u_impls,
                                  barostat=baro)
        vols = []
        for move in range(n_moves // barostat_interval):
            ctxt.multiple_steps(np.ones(barostat_interval))
            new_box = ctxt.get_box()
            volume = np.linalg.det(new_box)
            vols.append(volume)
        volume_trajs.append(vols)

    equil_time = len(volume_trajs[0]) // 2  # TODO: don't hard-code this?
    actual_volume_in_md = np.array(
        [np.mean(volume_traj[equil_time:]) for volume_traj in volume_trajs])

    np.testing.assert_allclose(actual=actual_volume_in_md,
                               desired=expected_volume_in_md,
                               rtol=relative_tolerance)
Exemplo n.º 22
0
def do_relative_docking(host_pdbfile, mol_a, mol_b, core, num_switches,
                        transition_steps):
    """Runs non-equilibrium switching jobs:
    1. Solvates a protein, minimizes w.r.t guest_A, equilibrates & spins off switching jobs
       (deleting guest_A while inserting guest_B) every 1000th step, calculates work.
    2. Does the same thing in solvent instead of protein
    Does num_switches switching jobs per leg.

    Parameters
    ----------

    host_pdbfile (str): path to host pdb file
    mol_a (rdkit mol): the starting ligand to swap from
    mol_b (rdkit mol): the ending ligand to swap to
    core (np.array[[int, int], [int, int], ...]): the common core atoms between mol_a and mol_b
    num_switches (int): number of switching trajectories to run per compound pair per leg
    transition_stpes (int): length of each switching trajectory

    Returns
    -------

    {str: float}: map of leg label to work values of switching mol_a to mol_b in that leg,
                  {'protein': [work values], 'solvent': [work_values]}

    Output
    ------

    stdout noting the step number, lambda value, and energy at various steps
    stdout noting the work of transition, if applicable
    stdout noting how long it took to run

    Note
    ----
    The work will not be calculated if any norm of force per atom exceeds 20000 kJ/(mol*nm)
       [MAX_NORM_FORCE defined in docking/report.py]
    The simulations won't run if the atom maps are not factorizable
    """

    # Prepare host
    # TODO: handle extra (non-transitioning) guests?
    print("Solvating host...")
    (
        solvated_host_system,
        solvated_host_coords,
        _,
        _,
        host_box,
        solvated_topology,
    ) = builders.build_protein_system(host_pdbfile)

    # Prepare water box
    print("Generating water box...")
    # TODO: water box probably doesn't need to be this big
    box_lengths = host_box[np.diag_indices(3)]
    water_box_width = min(box_lengths)
    (
        water_system,
        water_coords,
        water_box,
        water_topology,
    ) = builders.build_water_system(water_box_width)

    # it's okay if the water box here and the solvated protein box don't align -- they have PBCs

    # Run the procedure
    start_time = time.time()
    guest_name_a = mol_a.GetProp("_Name")
    guest_name_b = mol_b.GetProp("_Name")
    combined_name = guest_name_a + "-->" + guest_name_b

    guest_conformer_a = mol_a.GetConformer(0)
    orig_guest_coords_a = np.array(guest_conformer_a.GetPositions(),
                                   dtype=np.float64)
    orig_guest_coords_a = orig_guest_coords_a / 10  # convert to md_units

    ff = Forcefield.load_from_file("smirnoff_1_1_0_ccc.py")

    all_works = {}
    for system, coords, box, label in zip(
        [solvated_host_system, water_system],
        [solvated_host_coords, water_coords],
        [host_box, water_box],
        ["protein", "solvent"],
    ):
        # minimize w.r.t. both mol_a and mol_b?
        min_coords = minimizer.minimize_host_4d([mol_a], system, coords, ff,
                                                box)

        try:
            single_topology = topology.SingleTopology(mol_a, mol_b, core, ff)
            rfe = free_energy.RelativeFreeEnergy(single_topology)
            ups, sys_params, combined_masses, combined_coords = rfe.prepare_host_edge(
                ff.get_ordered_params(), system, min_coords)
        except topology.AtomMappingError as e:
            print(f"NON-FACTORIZABLE PAIR: {combined_name}")
            print(e)
            return {}

        combined_bps = []
        for up, sp in zip(ups, sys_params):
            combined_bps.append(up.bind(sp))
        all_works[label] = run_leg(
            combined_coords,
            combined_bps,
            combined_masses,
            box,
            combined_name,
            label,
            num_switches,
            transition_steps,
        )
        end_time = time.time()
        print(
            f"{combined_name} {label} leg time:",
            "%.2f" % (end_time - start_time),
            "seconds",
        )
    return all_works
Exemplo n.º 23
0
def estimate_dG(
    transformation: RelativeTransformation,
    num_lambdas: int,
    num_steps_per_lambda: int,
    num_equil_steps: int,
):
    # build the protein system.
    complex_system, complex_coords, _, _, complex_box = builders.build_protein_system(
        path_to_protein)

    # build the water system.
    solvent_system, solvent_coords, solvent_box, _ = builders.build_water_system(
        4.0)

    stage_dGs = []

    ff = transformation.ff
    mol_a, mol_b = transformation.mol_a, transformation.mol_b
    core = transformation.core

    # TODO: measure performance of complex and solvent separately

    lambda_schedule = construct_lambda_schedule(num_lambdas)

    for stage, host_system, host_coords, host_box in [
        ("complex", complex_system, complex_coords, complex_box),
        ("solvent", solvent_system, solvent_coords, solvent_box),
    ]:

        print("Minimizing the host structure to remove clashes.")
        minimized_host_coords = minimizer.minimize_host_4d(
            mol_a, host_system, host_coords, ff, host_box)

        single_topology = topology.SingleTopology(mol_a, mol_b, core, ff)
        rfe = free_energy.RelativeFreeEnergy(single_topology)

        # solvent leg
        host_args = []
        for lambda_idx, lamb in enumerate(lambda_schedule):
            gpu_idx = lambda_idx % num_gpus
            host_args.append(
                (gpu_idx, lamb, host_system, minimized_host_coords, host_box,
                 num_equil_steps, num_steps_per_lambda))

        # one GPU job per lambda window
        print("submitting tasks to client!")
        do_work = partial(wrap_method, fxn=rfe.host_edge)
        futures = []
        for lambda_idx, lamb in enumerate(lambda_schedule):
            arg = (lamb, host_system, minimized_host_coords, host_box,
                   num_equil_steps, num_steps_per_lambda)
            futures.append(client.submit(do_work, arg))

        results = []
        for fut in futures:
            results.append(fut.result())

        def _mean_du_dlambda(result):
            """summarize result of rfe.host_edge into mean du/dl

            TODO: refactor where this analysis step occurs
            """
            bonded_du_dl, nonbonded_du_dl, _ = result
            return np.mean(bonded_du_dl + nonbonded_du_dl)

        dG_host = np.trapz([_mean_du_dlambda(x) for x in results],
                           lambda_schedule)
        stage_dGs.append(dG_host)

    pred = stage_dGs[0] - stage_dGs[1]
    return pred
Exemplo n.º 24
0
# construct an RDKit molecule of aspirin
# note: not using OpenFF Molecule because want to avoid the dependency (YTZ?)
romol = Chem.AddHs(Chem.MolFromSmiles("CC(=O)OC1=CC=CC=C1C(=O)O"))

ligand_masses = [a.GetMass() for a in romol.GetAtoms()]

# generate conformers
AllChem.EmbedMolecule(romol)

# extract the 0th conformer
ligand_coords = get_romol_conf(romol)

# construct a 4-nanometer water box (from openmmtools approach: selecting out
#   of a large pre-equilibrated water box snapshot)
system, host_coords, box, omm_topology = builders.build_water_system(4.0)

host_bps, host_masses = openmm_deserializer.deserialize_system(system,
                                                               cutoff=1.2)

combined_masses = np.concatenate([host_masses, ligand_masses])

# write some conformations into this PDB file
writer = pdb_writer.PDBWriter([omm_topology, romol], "debug.pdb")

# note the order in which the coordinates are concatenated in this step --
#   in a later step we will need to combine recipes in the same order
combined_coords = np.concatenate([host_coords, ligand_coords])

num_host_atoms = host_coords.shape[0]
Exemplo n.º 25
0
def test_barostat_varying_pressure():
    temperature = 300.0 * unit.kelvin
    initial_waterbox_width = 3.0 * unit.nanometer
    timestep = 1.5 * unit.femtosecond
    barostat_interval = 3
    collision_rate = 1.0 / unit.picosecond
    seed = 2021
    np.random.seed(seed)

    # Start out with a very large pressure
    pressure = 1000.0 * unit.atmosphere
    mol_a = hif2a_ligand_pair.mol_a
    ff = hif2a_ligand_pair.ff
    complex_system, complex_coords, complex_box, complex_top = build_water_system(
        initial_waterbox_width.value_in_unit(unit.nanometer))

    min_complex_coords = minimize_host_4d([mol_a], complex_system,
                                          complex_coords, ff, complex_box)
    afe = AbsoluteFreeEnergy(mol_a, ff)

    unbound_potentials, sys_params, masses, coords = afe.prepare_host_edge(
        ff.get_ordered_params(), complex_system, min_complex_coords)

    # get list of molecules for barostat by looking at bond table
    harmonic_bond_potential = unbound_potentials[0]
    bond_list = get_bond_list(harmonic_bond_potential)
    group_indices = get_group_indices(bond_list)

    lam = 1.0

    u_impls = []
    for params, unbound_pot in zip(sys_params, unbound_potentials):
        bp = unbound_pot.bind(np.asarray(params))
        bp_impl = bp.bound_impl(precision=np.float32)
        u_impls.append(bp_impl)

    integrator = LangevinIntegrator(
        temperature.value_in_unit(unit.kelvin),
        timestep.value_in_unit(unit.picosecond),
        collision_rate.value_in_unit(unit.picosecond**-1),
        masses,
        seed,
    )
    integrator_impl = integrator.impl()

    v_0 = sample_velocities(masses * unit.amu, temperature)

    baro = custom_ops.MonteCarloBarostat(
        coords.shape[0],
        pressure.value_in_unit(unit.bar),
        temperature.value_in_unit(unit.kelvin),
        group_indices,
        barostat_interval,
        u_impls,
        seed,
    )

    ctxt = custom_ops.Context(coords,
                              v_0,
                              complex_box,
                              integrator_impl,
                              u_impls,
                              barostat=baro)
    ctxt.multiple_steps(np.ones(1000) * lam)
    ten_atm_box = ctxt.get_box()
    ten_atm_box_vol = compute_box_volume(ten_atm_box)
    # Expect the box to shrink thanks to the barostat
    assert compute_box_volume(complex_box) - ten_atm_box_vol > 0.4

    # Set the pressure to 1 bar
    baro.set_pressure((1 * unit.atmosphere).value_in_unit(unit.bar))
    # Changing the barostat interval resets the barostat step.
    baro.set_interval(2)

    ctxt.multiple_steps(np.ones(2000) * lam)
    atm_box = ctxt.get_box()
    # Box will grow thanks to the lower pressure
    assert compute_box_volume(atm_box) > ten_atm_box_vol
Exemplo n.º 26
0
def calculate_rigorous_work(
    host_pdbfile,
    guests_sdfile,
    outdir,
    num_deletions,
    deletion_steps,
    insertion_max_lambda=0.5,
    insertion_steps=501,
    eq1_steps=5001,
    fewer_outfiles=False,
    no_outfiles=False,
):
    """Runs non-equilibrium deletion jobs:
    1. Solvates a protein, inserts guest, equilibrates, equilibrates more & spins off
       deletion jobs every 1000th step, calculates work.
    2. Does the same thing in solvent instead of protein.
    Does num_deletions deletion jobs per leg per compound.

    Parameters
    ----------

    host_pdbfile (str): path to host pdb file
    guests_sdfile (str): path to guests sdf file
    outdir (str): path to directory to which to write output
    num_deletions (int): number of deletion trajectories to run per leg per compound
    deletion_steps (int): length of each deletion trajectory
    insertion_max_lambda (float): how far away to insert from (0.0-1.0)
    insertion_steps (int): how long to insert over
    eq1_steps (int): how long to equilibrate after insertion and before starting the deletions
    fewer_outfiles (bool): only save the starting frame of each deletion trajectory
    no_outfiles (bool): don't keep any output files

    Returns
    -------

    {str: {str: float}}: map of compound to leg label to work values
                         {'guest_1': {'protein': [work values], 'solvent': [work_values]}, ...}

    Output
    ------

    A pdb & sdf file for each guest's final insertion step
      (outdir/<guest_name>_pd_<step>_host.pdb & outdir/<guest_name>_pd_<step>_guest.sdf)
      (unless fewer_outfiles or no_outfiles is True)
    A pdb & sdf file for each guest's final eq1 step
      (outdir/<guest_name>_pd_<step>_host.pdb & outdir/<guest_name>_pd_<step>_guest.sdf)
      (unless fewer_outfiles or no_outfiles is True)
    A pdb & sdf file for each deletion job's first step
      (outdir/<guest_name>_pd_<step>_host.pdb & outdir/<guest_name>_pd_<step>_guest.sdf)
      (unless no_outfiles is True)
    stdout corresponding to the files written noting the lambda value and energy
    stdout noting the work of deletion, if applicable
    stdout noting how long each leg took to run

    Note
    ----
    The work will not be calculated if the du_dl endpoints are not close to 0 or if any norm of
    force per atom exceeds 20000 kJ/(mol*nm) [MAX_NORM_FORCE defined in docking/report.py]
    """

    if not os.path.exists(outdir):
        os.makedirs(outdir)

    print(f"""
    HOST_PDBFILE = {host_pdbfile}
    GUESTS_SDFILE = {guests_sdfile}
    OUTDIR = {outdir}

    DELETION_MAX_LAMBDA = {DELETION_MAX_LAMBDA}
    MIN_LAMBDA = {MIN_LAMBDA}
    insertion_max_lambda = {insertion_max_lambda}
    insertion_steps = {insertion_steps}
    eq1_steps = {eq1_steps}
    num_deletions = {num_deletions}
    deletion_steps = {deletion_steps}
    """)

    # Prepare host
    # TODO: handle extra (non-transitioning) guests?
    print("Solvating host...")
    (
        solvated_host_system,
        solvated_host_coords,
        _,
        _,
        host_box,
        solvated_topology,
    ) = builders.build_protein_system(host_pdbfile)

    _, solvated_host_pdb = tempfile.mkstemp(suffix=".pdb", text=True)
    writer = pdb_writer.PDBWriter([solvated_topology], solvated_host_pdb)
    writer.write_frame(solvated_host_coords)
    writer.close()
    solvated_host_mol = Chem.MolFromPDBFile(solvated_host_pdb, removeHs=False)
    os.remove(solvated_host_pdb)

    # Prepare water box
    print("Generating water box...")
    # TODO: water box probably doesn't need to be this big
    box_lengths = host_box[np.diag_indices(3)]
    water_box_width = min(box_lengths)
    (
        water_system,
        water_coords,
        water_box,
        water_topology,
    ) = builders.build_water_system(water_box_width)

    # it's okay if the water box here and the solvated protein box don't align -- they have PBCs
    _, water_pdb = tempfile.mkstemp(suffix=".pdb", text=True)
    writer = pdb_writer.PDBWriter([water_topology], water_pdb)
    writer.write_frame(water_coords)
    writer.close()
    water_mol = Chem.MolFromPDBFile(water_pdb, removeHs=False)
    os.remove(water_pdb)

    ff = Forcefield.load_from_file("smirnoff_1_1_0_ccc.py")

    # Run the procedure
    all_works = defaultdict(dict)
    print("Getting guests...")
    suppl = Chem.SDMolSupplier(guests_sdfile, removeHs=False)
    for guest_mol in suppl:
        start_time = time.time()
        guest_name = guest_mol.GetProp("_Name")
        guest_conformer = guest_mol.GetConformer(0)
        orig_guest_coords = np.array(guest_conformer.GetPositions(),
                                     dtype=np.float64)
        orig_guest_coords = orig_guest_coords / 10  # convert to md_units

        for system, coords, host_mol, box, label in zip(
            [solvated_host_system, water_system],
            [solvated_host_coords, water_coords],
            [solvated_host_mol, water_mol],
            [host_box, water_box],
            ["protein", "solvent"],
        ):
            minimized_coords = minimizer.minimize_host_4d([guest_mol], system,
                                                          coords, ff, box)

            afe = free_energy.AbsoluteFreeEnergy(guest_mol, ff)
            ups, sys_params, combined_masses, combined_coords = afe.prepare_host_edge(
                ff.get_ordered_params(), system, minimized_coords)

            combined_bps = []
            for up, sp in zip(ups, sys_params):
                combined_bps.append(up.bind(sp))

            works = run_leg(
                minimized_coords,
                orig_guest_coords,
                combined_bps,
                combined_masses,
                box,
                guest_name,
                label,
                host_mol,
                guest_mol,
                outdir,
                num_deletions,
                deletion_steps,
                insertion_max_lambda,
                insertion_steps,
                eq1_steps,
                fewer_outfiles,
                no_outfiles,
            )
            all_works[guest_name][label] = works
            end_time = time.time()
            print(
                f"{guest_name} {label} leg time:",
                "%.2f" % (end_time - start_time),
                "seconds",
            )
    return all_works
Exemplo n.º 27
0
def test_absolute_free_energy():

    suppl = Chem.SDMolSupplier("tests/data/ligands_40.sdf", removeHs=False)
    all_mols = [x for x in suppl]
    mol = all_mols[1]

    complex_system, complex_coords, _, _, complex_box, _ = builders.build_protein_system(
        "tests/data/hif2a_nowater_min.pdb")

    # build the water system.
    solvent_system, solvent_coords, solvent_box, _ = builders.build_water_system(
        4.0)

    ff = Forcefield.load_from_file("smirnoff_1_1_0_ccc.py")

    ff_params = ff.get_ordered_params()

    seed = 2021

    lambda_schedule = np.linspace(0, 1.0, 4)
    equil_steps = 1000
    prod_steps = 1000

    afe = free_energy.AbsoluteFreeEnergy(mol, ff)

    def absolute_model(ff_params):

        dGs = []

        for host_system, host_coords, host_box in [
            (complex_system, complex_coords, complex_box),
            (solvent_system, solvent_coords, solvent_box),
        ]:

            # minimize the host to avoid clashes
            host_coords = minimizer.minimize_host_4d([mol], host_system,
                                                     host_coords, ff, host_box)

            unbound_potentials, sys_params, masses, coords = afe.prepare_host_edge(
                ff_params, host_system, host_coords)

            harmonic_bond_potential = unbound_potentials[0]
            group_idxs = get_group_indices(
                get_bond_list(harmonic_bond_potential))

            x0 = coords
            v0 = np.zeros_like(coords)
            client = CUDAPoolClient(1)
            temperature = 300.0
            pressure = 1.0

            integrator = LangevinIntegrator(temperature, 1.5e-3, 1.0, masses,
                                            seed)

            barostat = MonteCarloBarostat(x0.shape[0], pressure, temperature,
                                          group_idxs, 25, seed)

            model = estimator.FreeEnergyModel(
                unbound_potentials,
                client,
                host_box,
                x0,
                v0,
                integrator,
                lambda_schedule,
                equil_steps,
                prod_steps,
                barostat,
            )

            dG, _ = estimator.deltaG(model, sys_params)
            dGs.append(dG)

        return dGs[0] - dGs[1]

    dG = absolute_model(ff_params)
    assert np.abs(dG) < 1000.0
Exemplo n.º 28
0
        client = GRPCClient(hosts=cmd_args.hosts)
    client.verify()

    path_to_ligand = "tests/data/ligands_40.sdf"
    suppl = Chem.SDMolSupplier(path_to_ligand, removeHs=False)

    forcefield = Forcefield.load_from_file("smirnoff_1_1_0_ccc.py")
    mols = [x for x in suppl]

    dataset = Dataset(mols)

    absolute_solvent_schedule = construct_absolute_lambda_schedule_solvent(
        cmd_args.num_windows)
    relative_solvent_schedule = construct_relative_lambda_schedule(
        cmd_args.num_windows - 1)
    solvent_system, solvent_coords, solvent_box, solvent_topology = builders.build_water_system(
        4.0)

    # pick the largest mol as the blocker
    largest_size = 0
    ref_mol = None
    for mol in mols:
        if mol.GetNumAtoms() > largest_size:
            largest_size = mol.GetNumAtoms()
            ref_mol = mol

    print("Reference Molecule:", ref_mol.GetProp("_Name"),
          Chem.MolToSmiles(ref_mol))

    temperature = 300.0
    pressure = 1.0
    dt = 2.5e-3
Exemplo n.º 29
0
            address,
            options=[
                ("grpc.max_send_message_length", 500 * 1024 * 1024),
                ("grpc.max_receive_message_length", 500 * 1024 * 1024),
            ],
        )

        stub = service_pb2_grpc.WorkerStub(channel)
        stubs.append(stub)

    ff_raw = open(forcefield, "r").read()

    ff_handlers = deserialize_handlers(ff_raw)

    box_width = 3.0
    host_system, host_coords, box, _ = builders.build_water_system(box_width)

    lambda_schedule = np.array(
        [float(x) for x in general_cfg["lambda_schedule"].split(",")])

    num_steps = int(general_cfg["n_steps"])

    for epoch in range(100):

        print("Starting Epoch", epoch,
              datetime.datetime.now().strftime("%d/%m/%Y %H:%M:%S"))

        epoch_dir = os.path.join(general_cfg["out_dir"], "epoch_" + str(epoch))

        if not os.path.exists(epoch_dir):
            os.makedirs(epoch_dir)
Exemplo n.º 30
0
    def test_endpoint_parameters_match_decoupling_and_conversion_complex(self):
        """Verifies that the parameters at the endpoint of conversion match with the starting parameters of
        the decoupling. Done on a complex model, as the hydration models differ

        Conv: P_start -> P_independent
        Decouple: P_independent -> P_arbitrary

        """
        host_system, host_coords, host_box, host_topology = builders.build_water_system(
            4.0)

        num_host_atoms = host_coords.shape[0]

        ff_params = hif2a_ligand_pair.ff.get_ordered_params()

        temperature = 300.0
        pressure = 1.0
        dt = 2.5e-3

        client = CUDAPoolClient(NUM_GPUS)

        decouple_model = RelativeBindingModel(
            client,
            hif2a_ligand_pair.ff,
            host_system,
            construct_lambda_schedule(2),
            host_topology,
            temperature,
            pressure,
            dt,
            10,
            50,
            frame_filter=all_frames,
        )

        blocker = hif2a_ligand_pair.mol_a
        ligand = hif2a_ligand_pair.mol_b

        decouple_topo = decouple_model.setup_topology(blocker, ligand)
        decouple_ref = RelativeFreeEnergy(decouple_topo)

        decouple_unbound_potentials, decouple_sys_params, _ = decouple_ref.prepare_host_edge(
            ff_params, decouple_model.host_system)

        conv_model = AbsoluteConversionModel(
            client,
            hif2a_ligand_pair.ff,
            host_system,
            construct_lambda_schedule(2),
            host_topology,
            temperature,
            pressure,
            dt,
            10,
            50,
            frame_filter=all_frames,
        )

        conv_topo = conv_model.setup_topology(ligand)
        conv_ref = AbsoluteFreeEnergy(ligand, conv_topo)

        conv_unbound_potentials, conv_sys_params, _ = conv_ref.prepare_host_edge(
            ff_params, conv_model.host_system)

        assert len(conv_sys_params) == len(decouple_sys_params)
        seen_nonbonded = False
        for i, decouple_pot in enumerate(decouple_unbound_potentials):
            if not isinstance(decouple_pot, NonbondedInterpolated):
                continue
            seen_nonbonded = True

            conv_pot = conv_unbound_potentials[i]
            assert isinstance(conv_pot, NonbondedInterpolated)

            conv_nonbonded_params = conv_sys_params[i]
            decouple_nonbonded_params = decouple_sys_params[i]

            # Shapes of parameters
            # Conversion Leg [src_ligand, dest_ligand]
            # Decouple Leg [dest_blocker, dest_ligand, blocker_halved, ligand_halved]

            # Should have the same number of parameters besides the blocker. Since params are interpolated, multiply by 2
            assert conv_nonbonded_params.shape[
                0] == decouple_nonbonded_params.shape[0] - blocker.GetNumAtoms(
                ) * 2
            # Should both share the same number of parameters types
            assert conv_nonbonded_params.shape[
                1] == decouple_nonbonded_params.shape[1]

            conv_params = conv_nonbonded_params[num_host_atoms * 2:]
            decouple_params = decouple_nonbonded_params[num_host_atoms * 2:]

            assert conv_params.shape[
                0] == decouple_params.shape[0] - blocker.GetNumAtoms() * 2

            # Verify the dest params of conv match the src params of decouple
            np.testing.assert_array_equal(
                conv_params[len(conv_params) // 2:],
                decouple_params[len(decouple_params) // 2 +
                                blocker.GetNumAtoms():],
            )

        assert seen_nonbonded, "Found no NonbondedInterpolated potential"