Пример #1
0
    ## kill replicas
    master.terminate_replicas()

else:

    from rexfw.replicas import Replica
    from rexfw.slaves import Slave
    from rexfw.proposers.re import REProposer

    from isd2.samplers.gibbs import GibbsSampler

    from ensemble_hic.setup_functions import make_posterior, make_subsamplers
    from ensemble_hic.setup_functions import setup_initial_state
    from ensemble_hic.replica import CompatibleReplica

    posterior = make_posterior(settings)
    for replica_parameter in schedule:
        posterior[replica_parameter].set(schedule[replica_parameter][rank - 1])

    initial_state = setup_initial_state(settings['initial_state'], posterior)
    if not 'norm' in initial_state.variables:
        posterior['norm'].set(
            np.max(posterior.likelihoods['ensemble_contacts'].error_model.data)
            / float(settings['general']['n_structures']))
    initial_state.update_variables(
        structures=np.load(cont_folder + 'init_states.npy')[rank - 1])
    initial_state.update_variables(norm=np.load(cont_folder +
                                                'init_norms.npy')[rank - 1])
    settings['structures_hmc'].update(
        timestep=np.load(cont_folder + 'timesteps.npy')[rank - 1])
    subsamplers = make_subsamplers(posterior, initial_state.variables,
Пример #2
0
def calculate_DOS(config_file,
                  n_samples,
                  subsamples_fraction,
                  burnin,
                  n_iter=100000,
                  tol=1e-10,
                  save_output=True,
                  output_suffix=''):
    """Calculates the density of states (DOS) using non-parametric
    histogram reweighting (WHAM).

    :param config_file: Configuration file
    :type config_file: str

    :param n_samples: number of samples the simulation ran
    :type n_samples: int

    :param subsamples_fraction: faction of samples (after burnin) to be analyzed
                         set this to, e.g., 10 to use one tenth of
                         n_samples to decrease compution time
    :type subsamples_fraction: int

    :param burnin: number of samples to be thrown away as part
                   of the burn-in period
    :type burnin: int

    :param n_iter: number of WHAM iterations
    :type n_iter: int

    :param tol: threshold up to which the negative log-likelihood being minimized
                in WHAM can change before iteration stops
    :type tol: float

    :param save_output: save resulting DOS object, parameters used during
                        calculation and indices of randomly chosen samples
                        in simulation output folder
    :type save_output: True

    :returns: DOS object
    :rtype: DOS
    """

    from ensemble_hic.wham import PyWHAM as WHAM, DOS

    from ensemble_hic.setup_functions import parse_config_file, make_posterior
    from ensemble_hic.analysis_functions import load_sr_samples

    settings = parse_config_file(config_file)
    n_replicas = int(settings['replica']['n_replicas'])
    target_replica = n_replicas

    params = {
        'n_samples': n_samples,
        'burnin': burnin,
        'subsamples_fraction': subsamples_fraction,
        'niter': n_iter,
        'tol': tol
    }

    n_samples = min(params['n_samples'], int(settings['replica']['n_samples']))
    dump_interval = int(settings['replica']['samples_dump_interval'])

    output_folder = settings['general']['output_folder']
    if output_folder[-1] != '/':
        output_folder += '/'
    n_beads = int(settings['general']['n_beads'])
    n_structures = int(settings['general']['n_structures'])
    schedule = np.load(output_folder + 'schedule.pickle')

    posterior = make_posterior(settings)
    p = posterior
    variables = p.variables

    energies = []
    L = p.likelihoods['ensemble_contacts']
    data = L.forward_model.data_points
    P = p.priors['nonbonded_prior']
    sels = []
    for i in range(n_replicas):
        samples = load_sr_samples(output_folder + 'samples/',
                                  i + 1,
                                  n_samples + 1,
                                  dump_interval,
                                  burnin=params['burnin'])
        sel = np.random.choice(len(samples),
                               int(len(samples) / float(subsamples_fraction)),
                               replace=False)
        samples = samples[sel]
        sels.append(sel)
        energies.append([[
            -L.log_prob(**x.variables) if 'lammda' in schedule else 0,
            -P.log_prob(structures=x.variables['structures'])
            if 'beta' in schedule else 0
        ] for x in samples])
        print "Calculated energies for {}/{} replicas...".format(i, n_replicas)

    energies = np.array(energies)
    energies_flat = energies.reshape(np.prod(energies.shape[:2]), 2)
    sched = np.array([schedule['lammda'], schedule['beta']])
    q = np.array([[(energy * replica_params).sum() for energy in energies_flat]
                  for replica_params in sched.T])
    wham = WHAM(len(energies_flat), n_replicas)
    wham.N[:] = len(energies_flat) / n_replicas
    wham.run(q, niter=params['niter'], tol=params['tol'], verbose=100)

    dos = DOS(energies_flat, wham.s, sort_energies=False)

    if save_output:
        import os
        import sys
        from cPickle import dump

        ana_path = output_folder + 'analysis/'
        if not os.path.exists(ana_path):
            os.makedirs(ana_path)
        with open(ana_path + 'dos{}.pickle'.format(output_suffix), 'w') as opf:
            dump(dos, opf)
        with open(ana_path + 'wham_params{}.pickle'.format(output_suffix),
                  'w') as opf:
            dump(params, opf)
        with open(ana_path + 'wham_sels{}.pickle'.format(output_suffix),
                  'w') as opf:
            dump(np.array(sels), opf)

    return dos
Пример #3
0
from ensemble_hic.setup_functions import parse_config_file, make_posterior
from ensemble_hic.analysis_functions import load_samples_from_cfg
from ensemble_hic.analysis_functions import write_ensemble
sys.path.append(os.path.expanduser('~/projects/ensemble_hic/scripts/misc/'))
from simlist import simulations

sims = simulations['1pga_1shf_fwm_poisson_new_fixed_it3']
cpath = sims['common_path']
n_structures = sims['n_structures']
odirs = sims['output_dirs']

MAP_samples = []
for (n, odir) in zip(n_structures, odirs):
    cfg_file = cpath + odir + '/config.cfg'
    p = make_posterior(parse_config_file(cfg_file))
    samples = load_samples_from_cfg(cfg_file, burnin=60000)
    Es = map(lambda x: -p.log_prob(**x.variables), samples)
    MAP_sample = samples[np.argmin(Es)]
    MAP_samples.append(MAP_sample.variables['structures'].reshape(n, 56, 3))

MAP_samples_flat = np.array([x for y in MAP_samples for x in y])

invar_rmsd = lambda x, y: min(rmsd(x, y), rmsd(x, -y))
tmp = StructureParser(
    os.path.expanduser('~/projects/ensemble_hic/data/proteins/1pga.pdb'))
ref_1pga = tmp.parse().get_coordinates(['CA'])
tmp = StructureParser(
    os.path.expanduser('~/projects/ensemble_hic/data/proteins/1shf.pdb'))
ref_1shf = tmp.parse().get_coordinates(['CA'])
rmsds_to_1pga = map(lambda x: invar_rmsd(ref_1pga, x), MAP_samples_flat)
def plot_correlations(ax):

    correlations = []
    n_structures = sims['n_structures']
    cpath = sims['common_path']
    opdirs = sims['output_dirs']

    for i, n in enumerate(n_structures):
        sim_path = cpath + opdirs[i] + '/'
        config_file = sim_path + 'config.cfg'
        settings = parse_config_file(config_file)
        # samples = load_sr_samples(sim_path + 'samples/',
        #                           int(settings['replica']['n_replicas']),
        #                           48001, 1000, 30000)

        from ensemble_hic.analysis_functions import load_samples_from_cfg_auto
        samples = load_samples_from_cfg_auto(config_file, burnin=30000)

        p = make_posterior(settings)
        fwm = p.likelihoods['ensemble_contacts'].forward_model
        dps = fwm.data_points
        inds = np.where(np.abs(dps[:, 0] - dps[:, 1]) > 8)
        corrs = []
        for sample in samples:
            md = fwm(**sample.variables)
            corrs.append(np.corrcoef(md[inds], dps[:, 2][inds])[0, 1])
        correlations.append([np.mean(corrs), np.std(corrs)])

        # energies = np.array(map(lambda x: -p.log_prob(**x.variables), samples))
        # map_sample = samples[np.argmin(energies)]
        # md = fwm(**map_sample.variables)
        # correlations.append(np.corrcoef(md[inds], dps[:,2][inds])[0,1])
    correlations = np.array(correlations)

    # ax.plot(n_structures, correlations, marker='o', ls='--', c='k')
    ax.errorbar(n_structures,
                correlations[:, 0],
                correlations[:, 1],
                marker='o',
                ls='--',
                c='k')
    ax.set_xticks(n_structures)
    ax.set_xlabel('number of states $n$')
    ax.set_ylabel(r'$\rho$(back-calculated data, experimental data)')
    for spine in ['top', 'right']:
        ax.spines[spine].set_visible(False)

    if not True:
        ## make inset
        from mpl_toolkits.axes_grid1.inset_locator import inset_axes
        inset_ax = inset_axes(ax, width='40%', height='25%', loc=10)
        inset_ax.plot(n_structures[2:6],
                      correlations[2:6],
                      ls='--',
                      marker='o',
                      color='black')
        inset_ax.set_xticks(n_structures[2:6])
        inset_ax.spines['top'].set_visible(False)
        inset_ax.spines['right'].set_visible(False)
        inset_ax.set_ylim((0.985, 1.005))
        inset_ax.set_yticks((0.99, 1.0))
        inset_ax.set_xlim((7, 105))
Пример #5
0
    
    logZs = []
    data_terms = []
    for x in output_dirs:
        dos = np.load(x + '/analysis/dos.pickle')
        logZs.append(log_sum_exp(-dos.E.sum(1) + dos.s) - \
                     log_sum_exp(-dos.E[:,1] + dos.s))
        a = x.find('replicas')
        b = x[a-4:].find('_')
        n_replicas = int(x[a-4+b+1:a])
        
        p = np.load(x + '/analysis/wham_params.pickle')
        c = parse_config_file(x + '/config.cfg')
        s = load_sr_samples(x + '/samples/', n_replicas, p['n_samples']+1,
                            int(c['replica']['samples_dump_interval']),
                            p['burnin'])
        sels = np.load(x + '/analysis/wham_sels.pickle')
        s = s[sels[-1]]
        p = make_posterior(parse_config_file(x + '/config.cfg'))
        L = p.likelihoods['ensemble_contacts']
        d = L.forward_model.data_points[:,2]
        f = gammaln(d+1).sum()
        print "mean log-posterior:", np.mean(map(lambda x: p.log_prob(**x.variables), s))
        logZs[-1] -= f + np.log(len(d)) * (not '1pga' in x)
        data_terms.append(np.array(map(lambda x: -L.log_prob(**x.variables), s)).mean() + f)
        print "evidence:", logZs[-1]
    data_terms = np.array(data_terms)    

    with open(out_file, "w") as opf:
        dump((n_structures, logZs, data_terms), opf)