def freesolv_demo(n_configuration_samples=10, n_parameter_samples=10000):
    """Run toy 2D parameterization demo with all of freesolv"""

    np.random.seed(0)

    mols = []
    hydrogens = []

    for smiles in smiles_list:
        mol = Molecule(smiles, vacuum_samples=[])
        path_to_vacuum_samples = resource_filename(
            'bayes_implicit_solvent',
            'vacuum_samples/vacuum_samples_{}.h5'.format(
                mol.mol_index_in_smiles_list))
        vacuum_traj = md.load(path_to_vacuum_samples)
        thinning = int(len(vacuum_traj) / n_configuration_samples)
        mol.vacuum_traj = mdtraj_to_list_of_unitted_snapshots(
            vacuum_traj[::thinning])
        print('thinned vacuum_traj from {} to {}'.format(
            len(vacuum_traj), len(mol.vacuum_traj)))
        hydrogens.append(
            np.array([a.element.symbol == 'H' for a in mol.top.atoms()]))
        mols.append(mol)

    def log_prob(radii):
        logp = 0
        for i in range(len(mols)):
            mol = mols[i]
            atomic_radii = np.ones(len(mol.pos)) * radii[0]
            atomic_radii[hydrogens[i]] = radii[1]

            # TODO: update this example to allow the scaling_factors to be variable also
            default_scaling_factors = np.ones(len(mol.pos))
            logp += mol.log_prob(atomic_radii, default_scaling_factors)

        return logp

    radii0 = np.array([0.1, 0.1])
    scales0 = np.array([0.8, 0.8])
    theta0 = pack(radii0, scales0)

    stepsize = 0.002

    traj, log_probs, acceptance_fraction = random_walk_mh(
        theta0, log_prob, n_steps=n_parameter_samples, stepsize=stepsize)

    np.savez(os.path.join(data_path, 'H_vs_not_freesolv.npz'),
             traj=traj,
             log_probs=log_probs,
             acceptance_fraction=acceptance_fraction,
             stepsize=stepsize,
             n_steps=n_parameter_samples)

    print("acceptance fraction: {:.4f}".format(acceptance_fraction))
def methane_demo(n_configuration_samples=10, n_parameter_samples=100000):
    """Run toy 2D parameterization demo with methane only"""

    np.random.seed(0)

    smiles = 'C'
    mol = Molecule(smiles, vacuum_samples=[])
    path_to_vacuum_samples = resource_filename(
        'bayes_implicit_solvent', 'vacuum_samples/vacuum_samples_{}.h5'.format(
            mol.mol_index_in_smiles_list))
    vacuum_traj = md.load(path_to_vacuum_samples)
    thinning = int(len(vacuum_traj) / n_configuration_samples)
    mol.vacuum_traj = mdtraj_to_list_of_unitted_snapshots(
        vacuum_traj[::thinning])
    print('thinned vacuum_traj from {} to {}'.format(len(vacuum_traj),
                                                     len(mol.vacuum_traj)))

    def log_prob(radii):
        atomic_radii = np.zeros(len(mol.pos))
        atomic_radii[0] = radii[0]
        atomic_radii[1:] = radii[1]

        # TODO: update this example to allow the scaling_factors to be variable also
        default_scaling_factors = np.ones(len(radii))

        return mol.log_prob(atomic_radii, default_scaling_factors)

    radii0 = np.array([0.1, 0.1])

    traj, log_probs, acceptance_fraction = random_walk_mh(
        radii0, log_prob, n_steps=n_parameter_samples, stepsize=0.1)

    np.save(
        os.path.join(data_path,
                     'H_vs_not_radii_samples_{}.npy'.format(smiles)), traj)

    print("acceptance fraction: {:.4f}".format(acceptance_fraction))
Пример #3
0
    try:
        job_id = int(sys.argv[1])
    except:
        print("No valid job_id supplied! Selecting one at random")
        job_id = onp.random.randint(10000)

    onp.random.seed(job_id)

    obc2_theta = np.array([
        1.5, 1.2, 1.7, 1.55, 1.5, 1.5, 2.1, 1.85, 1.8, 0.8, 0.85, 0.72, 0.79,
        0.85, 0.88, 0.8, 0.86, 0.96
    ])

    x0 = obc2_theta + onp.random.randn(len(obc2_theta)) * perturbation_sigma

    prior_sample = sample_prior()

    rw_mh_traj, rw_mh_post_traj, accept_rate = random_walk_mh(
        x0, log_posterior, n_steps=n_steps, stepsize=step_size)

    prediction_traj = onp.array(
        list(map(get_predictions, rw_mh_traj[::pred_traj_thinning])))

    onp.savez(
        'rw_mh_starting_from_obc2_perturbed_by_sigma={},job_id={}'.format(
            perturbation_sigma, job_id),
        random_seed=job_id,
        rw_mh_traj=onp.asarray(rw_mh_traj),
        rw_mh_post_traj=onp.asarray(rw_mh_post_traj),
        prediction_traj=prediction_traj)
def quarter_freesolv_demo(n_configuration_samples=10,
                          n_parameter_samples=10000,
                          good_initialization=False):
    """Run toy 2D parameterization demo with one randomly-selected quarter of freesolv"""

    np.random.seed(0)

    inds = np.arange(len(smiles_list))
    np.random.shuffle(inds)
    inds = inds[:int(len(smiles_list) / 4)]

    quarter_smiles = [smiles_list[i] for i in inds]

    mols = []
    hydrogens = []

    for smiles in quarter_smiles:
        mol = Molecule(smiles, vacuum_samples=[])
        path_to_vacuum_samples = resource_filename(
            'bayes_implicit_solvent',
            'vacuum_samples/vacuum_samples_{}.h5'.format(
                mol.mol_index_in_smiles_list))
        vacuum_traj = md.load(path_to_vacuum_samples)
        thinning = int(len(vacuum_traj) / n_configuration_samples)
        mol.vacuum_traj = mdtraj_to_list_of_unitted_snapshots(
            vacuum_traj[::thinning])
        print('thinned vacuum_traj from {} to {}'.format(
            len(vacuum_traj), len(mol.vacuum_traj)))
        hydrogens.append(
            np.array([a.element.symbol == 'H' for a in mol.top.atoms()]))
        mols.append(mol)

    def log_prob(theta):
        radii, scales = unpack(theta)
        logp = 0
        for i in range(len(mols)):
            mol = mols[i]
            atomic_radii = np.ones(len(mol.pos)) * radii[0]
            atomic_radii[hydrogens[i]] = radii[1]

            atomic_scales = np.ones(len(mol.pos)) * scales[0]
            atomic_scales[hydrogens[i]] = scales[1]

            logp += mol.log_prob(atomic_radii, atomic_scales)

        return logp

    radii0 = np.array([0.1, 0.1])
    scales0 = np.array([0.8, 0.8])
    if good_initialization:
        radii0 = np.array([0.28319081, 0.20943347])
        scales0 = np.array([0.89298609, 0.67449963])

    theta0 = pack(radii0, scales0)

    stepsize = 0.0005

    traj, log_probs, acceptance_fraction = random_walk_mh(
        theta0, log_prob, n_steps=n_parameter_samples, stepsize=stepsize)

    np.savez(os.path.join(
        data_path,
        'H_vs_not_freesolv_{}_dt={}.npz'.format(len(quarter_smiles),
                                                stepsize)),
             traj=traj,
             log_probs=log_probs,
             acceptance_fraction=acceptance_fraction,
             stepsize=stepsize,
             n_steps=n_parameter_samples,
             smiles_subset=quarter_smiles,
             n_configuration_samples=n_configuration_samples)

    print("acceptance fraction: {:.4f}".format(acceptance_fraction))
Пример #5
0
            initial_scaling_factor_dict[a] for a in elements
        ]
        prior_location = np.array(
            initial_radius_array + initial_scaling_factor_array
        )  # mbondi2 set, except not differentiation H from HN...
        # prior_location = np.array([0.17, 0.12, 0.72, 0.85]) # mbondi2 set

        x0 = prior_location

        def log_prob_fun(theta):
            if (min(theta) < 0.01) or (max(theta) > 5):
                return -np.inf
            else:
                return np.sum(norm.logpdf(
                    theta - prior_location)) + log_likelihood_of_params(theta)

        mh_result = random_walk_mh(x0,
                                   log_prob_fun,
                                   n_steps=10000,
                                   stepsize=0.01)

        np.savez(
            'independent_mh_jax/cid={}.npz'.format(cids[i]),
            traj=mh_result[0],
            log_prob_traj=mh_result[1],
            expt_mean=expt_means[i],
            expt_unc=expt_uncs[i],
            cid=cids[i],
            symmetry_types=symmetry_types,
        )
Пример #6
0
            return -np.inf
        else:
            return np.sum(norm.logpdf(theta - prior_location)) + log_likelihood_of_params(theta)
    stepsize = np.ones(len(x0))
    stepsize[:N] = 0.0005
    stepsize[N:] = 0.001
    trajs = []
    log_prob_trajs = []

    predictions = get_predictions(x0)
    #print('RMSE: {} kcal/mol'.format(rmse_in_kcal_mol(predictions, expt_means)))

    from tqdm import tqdm
    trange = tqdm(range(1000))
    for t in trange:
        mh_result = random_walk_mh(x0, log_prob_fun, n_steps=50, stepsize=stepsize, progress_bar=False)
        trajs.append(mh_result[0])
        log_prob_trajs.append(mh_result[1])

        predictions = get_predictions(mh_result[0][-1])
        rmse =rmse_in_kcal_mol(predictions, expt_means)
        trange.set_postfix({'train RMSE': '{:.3f} kcal/mol'.format(rmse), 'log_prob': mh_result[1][-1], 'accept_fraction': mh_result[2]})

        x0 = trajs[-1][-1]

        if t % 20 == 0:
            traj = np.vstack(trajs)
            log_prob_traj = np.hstack(log_prob_trajs)
            x0 = traj[-1]
            np.savez('freesolv_mh_jax_df=5.npz',
                     traj=traj,