Пример #1
0

# Initialize
print("Running MCMC sampling...")
ndim = len(ml_params)
nwalkers = 32
pos = ml_params + 1e-5 * np.random.randn(nwalkers, ndim)
lp = np.array(list(map(log_prob, pos)))
m = ~np.isfinite(lp)
while np.any(m):
    pos[m] = ml_params + 1e-5 * np.random.randn(m.sum(), ndim)
    lp[m] = np.array(list(map(log_prob, pos[m])))
    m = ~np.isfinite(lp)

# Sample
sampler = emcee3.Sampler()
ensemble = emcee3.Ensemble(emcee3.SimpleModel(log_prob), pos)
sampler.run(ensemble, 1250, progress=True)

# Compute the model predictions
gp.set_parameter_vector(ml_params)
x = np.linspace(t.min(), t.max(), 5000)
mu, var = gp.predict(y, x, return_var=True)
omega = np.exp(np.linspace(np.log(0.1), np.log(10), 5000))
psd = gp.kernel.get_psd(omega)
period = np.exp(gp.get_parameter("kernel:log_period"))
tau = np.linspace(0, 4 * period, 5000)
acf = gp.kernel.get_value(tau)

# Compute the sample predictions
print("Making plots...")
Пример #2
0
with open("astero-{0}.pkl".format(kicid), "wb") as f:
    pickle.dump((
        gp, fit_y, freq, power_all, power_some, len(x),
    ), f, -1)

if os.path.exists("astero-{0}.h5".format(kicid)):
    result = input("MCMC save file exists. Overwrite? (type 'yes'): ")
    if result.lower() != "yes":
        sys.exit(0)

# Define a custom proposal
def astero_move(rng, x0):
    x = np.array(x0)
    f = 2.0 * (rng.rand(len(x)) < 0.5) - 1.0
    x[:, 3] = np.log(np.exp(x[:, 3]) + f * np.exp(x[:, 4]))
    return x, np.zeros(len(x))

# The sampler will use a mixture of proposals
sampler = emcee3.Sampler([
    emcee3.moves.StretchMove(),
    emcee3.moves.DEMove(1e-3),
    emcee3.moves.KDEMove(),
    emcee3.moves.MHMove(astero_move),
], backend=emcee3.backends.HDFBackend("astero-{0}.h5".format(kicid)))

# Sample!
with emcee3.pools.InterruptiblePool() as pool:
    ensemble = emcee3.Ensemble(emcee3.SimpleModel(log_prob), initial_samples,
                               pool=pool)
    ensemble = sampler.run(ensemble, 10000, progress=True)
Пример #3
0
def fit_star(star, verbose=False):
    output_filename = "{0}.h5".format(star.kepid)
    logging.info("Output filename: {0}".format(output_filename))
    if os.path.exists(output_filename):
        return

    time.sleep(30)
    return

    strt = time.time()

    # The KIC parameters
    mean_log_mass = np.log(star.mass)
    sigma_log_mass = (np.log(star.mass + star.mass_err1) -
                      np.log(star.mass + star.mass_err2)
                      )  # double the kic value
    mean_feh = star.feh
    sigma_feh = star.feh_err1 - star.feh_err2  # double the kic value

    min_distance, max_distance = 0.0, 3000.0

    # Other bands
    other_bands = dict()
    if np.isfinite(star.tgas_w1gmag):
        other_bands = dict(
            W1=(star.tgas_w1gmag, star.tgas_w1gmag_error),
            W2=(star.tgas_w2gmag, star.tgas_w2gmag_error),
            W3=(star.tgas_w3gmag, star.tgas_w3gmag_error),
        )
    if np.isfinite(star.tgas_Vmag):
        other_bands["V"] = (star.tgas_Vmag, star.tgas_e_Vmag)
    if np.isfinite(star.tgas_Bmag):
        other_bands["B"] = (star.tgas_Bmag, star.tgas_e_Bmag)
    if np.isfinite(star.tgas_gpmag):
        other_bands["g"] = (star.tgas_gpmag, star.tgas_e_gpmag)
    if np.isfinite(star.tgas_rpmag):
        other_bands["r"] = (star.tgas_rpmag, star.tgas_e_rpmag)
    if np.isfinite(star.tgas_ipmag):
        other_bands["i"] = (star.tgas_ipmag, star.tgas_e_ipmag)

    # Build the model
    mist = MIST_Isochrone()
    mod = StarModel(mist,
                    J=(star.jmag, star.jmag_err),
                    H=(star.hmag, star.hmag_err),
                    K=(star.kmag, star.kmag_err),
                    parallax=(star.tgas_parallax, star.tgas_parallax_error),
                    **other_bands)

    # Initialize
    nwalkers = 500
    ndim = 5
    lnpost_init = -np.inf + np.zeros(nwalkers)
    coords_init = np.empty((nwalkers, ndim))
    m = ~np.isfinite(lnpost_init)
    while np.any(m):
        K = m.sum()

        # Mass
        coords_init[m, 0] = np.exp(mean_log_mass +
                                   sigma_log_mass * np.random.randn(K))

        # Age
        u = np.random.rand(K)
        coords_init[m,
                    1] = np.log((np.exp(mist.maxage) - np.exp(mist.minage)) *
                                u + np.exp(mist.minage))

        # Fe/H
        coords_init[m, 2] = mean_feh + sigma_feh * np.random.randn(K)

        # Distance
        u = np.random.rand(K)
        coords_init[m, 3] = (u * (max_distance**3 - min_distance**3) +
                             min_distance**3)**(1. / 3)

        # Av
        coords_init[m, 4] = np.random.rand(K)

        lnpost_init[m] = np.array(list(map(mod.lnpost, coords_init[m])))
        m = ~np.isfinite(lnpost_init)

    class ICModel(emcee3.Model):
        def compute_log_prior(self, state):
            state.log_prior = mod.lnprior(state.coords)
            return state

        def compute_log_likelihood(self, state):
            state.log_likelihood = mod.lnlike(state.coords)
            return state

    sampler = emcee3.Sampler(emcee3.moves.KDEMove())
    ensemble = emcee3.Ensemble(ICModel(), coords_init)

    chunksize = 200
    targetn = 3
    for iteration in range(100):
        if verbose:
            print("Iteration {0}...".format(iteration + 1))
        sampler.run(ensemble, chunksize, progress=verbose)
        mu = np.mean(sampler.get_coords(), axis=1)
        try:
            tau = emcee3.autocorr.integrated_time(mu, c=1)
        except emcee3.autocorr.AutocorrError:
            continue
        tau_max = tau.max()
        neff = ((iteration + 1) * chunksize / tau_max - 2.0)
        if verbose:
            print("Maximum autocorrelation time: {0}".format(tau_max))
            print("N_eff: {0}\n".format(neff * nwalkers))
        if neff > targetn:
            break

    burnin = int(2 * tau_max)
    ntot = 5000
    if verbose:
        print("Discarding {0} samples for burn-in".format(burnin))
        print("Randomly choosing {0} samples".format(ntot))
    samples = sampler.get_coords(flat=True, discard=burnin)
    total_samples = len(total_samples)
    inds = np.random.choice(np.arange(len(samples)), size=ntot, replace=False)
    samples = samples[inds]

    fit_parameters = np.empty(len(samples),
                              dtype=[
                                  ("mass", float),
                                  ("log10_age", float),
                                  ("feh", float),
                                  ("distance", float),
                                  ("av", float),
                              ])
    computed_parameters = np.empty(len(samples),
                                   dtype=[
                                       ("radius", float),
                                       ("teff", float),
                                       ("logg", float),
                                   ])

    if verbose:
        prog = tqdm.tqdm
    else:
        prog = lambda f, *args, **kwargs: f
    for i, p in prog(enumerate(samples), total=len(samples)):
        ic = mod.ic(*p)
        fit_parameters[i] = p
        computed_parameters[i] = (ic["radius"], ic["Teff"], ic["logg"])

    total_time = time.time() - strt
    logging.info("emcee3 took {0} sec".format(total_time))

    with h5py.File(output_filename, "w") as f:
        f.attrs["kepid"] = int(star.kepid)
        f.attrs["neff"] = neff * nwalkers
        f.attrs["runtime"] = total_time
        f.create_dataset("fit_parameters", data=fit_parameters)
        f.create_dataset("computed_parameters", data=computed_parameters)

    # Plot
    fig = corner.corner(samples)
    fig.savefig("corner-{0}.png".format(star.kepid))
    plt.close(fig)
Пример #4
0
def fit_emcee3(mod,
               nwalkers=500,
               verbose=False,
               nsamples=5000,
               targetn=4,
               iter_chunksize=200,
               pool=None,
               overwrite=False,
               maxiter=10,
               sample_directory='mcmc_chains',
               nburn=2,
               mixedmoves=True,
               resultsdir='mcmc_results',
               prior_only=False,
               **kwargs):
    """fit model using Emcee3

    modeled after https://github.com/dfm/gaia-kepler/blob/master/fit.py

    nburn is number of autocorr times to discard as burnin.
    """

    # Initialize
    if prior_only:
        walker = Emcee3PriorModel(mod)
    else:
        walker = Emcee3Model(mod)
    ndim = mod.n_params

    if sample_directory is not None:
        sample_file = os.path.join(sample_directory, '{}.h5'.format(mod.name))
        if not os.path.exists(sample_directory):
            os.makedirs(sample_directory)
        backend = HDFBackend(sample_file)
        try:
            coords_init = backend.current_coords
        except (AttributeError, KeyError):
            coords_init = mod.sample_from_prior(nwalkers,
                                                require_valid=True,
                                                values=True)
    else:
        backend = Backend()
        coords_init = mod.sample_from_prior(nwalkers,
                                            require_valid=True,
                                            values=True)

    if mixedmoves:
        moves = [(emcee3.moves.KDEMove(), 0.4),
                 (emcee3.moves.DEMove(1.0), 0.4),
                 (emcee3.moves.DESnookerMove(), 0.2)]
    else:
        moves = emcee3.moves.KDEMove()

    sampler = emcee3.Sampler(moves, backend=backend)
    if overwrite:
        sampler.reset()
        coords_init = mod.sample_from_prior(nwalkers,
                                            require_valid=True,
                                            values=True)

    if pool is None:
        from emcee3.pools import DefaultPool
        pool = DefaultPool()

    try:
        ensemble = emcee3.Ensemble(walker, coords_init, pool=pool)
    except ValueError:
        import pdb
        pdb.set_trace()

    def calc_stats(s):
        """returns tau_max, neff
        """
        tau = s.get_integrated_autocorr_time(c=1)
        tau_max = tau.max()
        neff = s.backend.niter / tau_max - nburn
        if verbose:
            print("Maximum autocorrelation time: {0}".format(tau_max))
            print("N_eff: {0} ({1})\n".format(neff * nwalkers, neff - nburn))
        return tau_max, neff

    done = False
    if not overwrite:
        try:
            if verbose:
                print('Status from previous run:')
            tau_max, neff = calc_stats(sampler)
            if neff > targetn:
                done = True
        except (emcee3.autocorr.AutocorrError, KeyError):
            pass

    chunksize = iter_chunksize
    for iteration in range(maxiter):
        if done:
            break
        if verbose:
            print("Iteration {0}...".format(iteration + 1))
        sampler.run(ensemble, chunksize, progress=verbose)
        try:
            tau_max, neff = calc_stats(sampler)
        except emcee3.autocorr.AutocorrError:
            tau_max = 0
            continue
        if neff > targetn:
            done = True

    burnin = int(nburn * tau_max)
    ntot = nsamples
    samples = sampler.get_coords(flat=True, discard=burnin)
    total_samples = len(samples)
    if ntot > total_samples:
        ntot = total_samples
    if verbose:
        print("Discarding {0} samples for burn-in".format(burnin))
        print("Randomly choosing {0} samples".format(ntot))
    inds = np.random.choice(total_samples, size=ntot, replace=False)
    samples = samples[inds]

    df = pd.DataFrame(samples, columns=mod.param_names)
    write_samples(mod, df, resultsdir=resultsdir)

    return df
Пример #5
0

# Initialize
print("Running MCMC sampling...")
ndim = len(ml_params)
nwalkers = 32
pos = ml_params + 1e-5 * np.random.randn(nwalkers, ndim)
lp = np.array(list(map(log_prob, pos)))
m = ~np.isfinite(lp)
while np.any(m):
    pos[m] = ml_params + 1e-5 * np.random.randn(m.sum(), ndim)
    lp[m] = np.array(list(map(log_prob, pos[m])))
    m = ~np.isfinite(lp)

# Sample
sampler = emcee3.Sampler(backend=emcee3.backends.HDFBackend("transit.h5"))
with emcee3.pools.InterruptiblePool() as pool:
    ensemble = emcee3.Ensemble(emcee3.SimpleModel(log_prob), pos, pool=pool)
    sampler.run(ensemble, 15000, progress=True)

# Plot the parameter constraints
samples = np.array(sampler.get_coords(discard=5000, flat=True, thin=13))
samples = samples[:, 1:5]
samples[:, :3] = np.exp(samples[:, :3])
truths = np.array(true_params[1:5])
truths[:3] = np.exp(truths[:3])
fig = corner.corner(
    samples,
    truths=truths,
    labels=[r"period", r"$R_\mathrm{P}/R_\star$", r"duration", r"$t_0$"])
fig.savefig("transit-corner.pdf")
Пример #6
0
def mcmc_fit(x,
             y,
             yerr,
             p_init,
             p_max,
             id,
             RESULTS_DIR,
             truths,
             burnin=500,
             nwalkers=12,
             nruns=10,
             full_run=500,
             diff_threshold=.5,
             n_independent=1000):
    """
    Run the MCMC
    """

    try:
        print("Total number of points  = ", sum([len(i) for i in x]))
        print("Number of light curve sections = ", len(x))
    except TypeError:
        print("Total number of points  = ", len(x))

    theta_init = np.log(
        [np.exp(-12), np.exp(7),
         np.exp(-1), np.exp(-17), p_init])
    runs = np.zeros(nruns) + full_run
    ndim = len(theta_init)

    print("p_init = ", p_init, "days, log(p_init) = ", np.log(p_init),
          "p_max = ", p_max)
    args = (x, y, yerr, np.log(p_init), p_max)

    # Time the LHF call.
    start = time.time()
    mod = MyModel(x, y, yerr, np.log(p_init), p_max)
    print("lnlike = ", mod.lnlike_split(theta_init), "lnprior = ",
          mod.Glnprior(theta_init), "\n")
    end = time.time()
    tm = end - start
    print("1 lhf call takes ", tm, "seconds")
    print("burn in will take", tm * nwalkers * burnin, "s")
    print("each run will take", tm * nwalkers * runs[0] / 60, "mins")
    print("total = ",
          (tm * nwalkers * np.sum(runs) + tm * nwalkers * burnin) / 60, "mins")

    # Run MCMC.
    mod = MyModel(x, y, yerr, np.log(p_init), p_max)
    model = emcee3.SimpleModel(mod.lnlike_split, mod.Glnprior)
    p0 = [theta_init + 1e-4 * np.random.rand(ndim) for i in range(nwalkers)]
    ensemble = emcee3.Ensemble(model, p0)
    #     moves = emcee3.moves.KDEMove()
    #     sampler = emcee3.Sampler(moves)
    sampler = emcee3.Sampler()

    print("burning in...")
    total_start = time.time()
    ensemble = sampler.run(ensemble, burnin)

    flat = sampler.get_coords(flat=True)
    logprob = sampler.get_log_probability(flat=True)
    ensemble = emcee3.Ensemble(model, p0)

    # repeating MCMC runs.
    autocorr_times, mean_ind, mean_diff = [], [], []
    sample_array = np.zeros((nwalkers, sum(runs), ndim))
    for i, run in enumerate(runs):
        print("run {0} of {1}".format(i, len(runs)))
        print("production run, {0} steps".format(int(run)))
        start = time.time()
        ensemble = sampler.run(ensemble, run)
        end = time.time()
        print("time taken = ", (end - start) / 60, "minutes")

        f = h5py.File(os.path.join(RESULTS_DIR, "{0}.h5".format(id)), "w")
        data = f.create_dataset("samples",
                                np.shape(sampler.get_coords(flat=True)))
        data[:, :] = sampler.get_coords(flat=True)
        f.close()

        print("samples = ", np.shape(sampler.get_coords(flat=True)))
        results = make_plot(sampler,
                            x,
                            y,
                            yerr,
                            id,
                            RESULTS_DIR,
                            truths,
                            traces=True,
                            tri=True,
                            prediction=True)
        nsteps, _ = np.shape(sampler.get_coords(flat=True))
        conv, autocorr_times, ind_samp, diff = \
                evaluate_convergence(sampler.get_coords(flat=True),
                                     autocorr_times, diff_threshold,
                                     n_independent)
        mean_ind.append(ind_samp)
        mean_diff.append(diff)
        print("Converged?", conv)
        if conv:
            break

    total_end = time.time()
    total_time = total_end - total_start
    print("Total time taken = ", total_time / 60., "minutes",
          total_time / 3600., "hours")

    with open(os.path.join(RESULTS_DIR, "{0}_time.txt".format(id)), "w") as f:
        f.write("{}".format(total_time))


#     col = "b"
#     if conv:
#         col = "r"
#     if autocorr_times:
#         plt.clf()
#         plt.plot(autocorr_times, color=col)
#         plt.savefig(os.path.join(RESULTS_DIR, "{0}_acorr".format(id)))
#         plt.clf()
#         plt.plot(mean_ind, color=col)
#         plt.savefig(os.path.join(RESULTS_DIR, "{0}_ind".format(id)))
#         plt.clf()
#         plt.plot(mean_diff, color=col)
#         plt.savefig(os.path.join(RESULTS_DIR, "{0}_diff".format(id)))
    return