times[i, 1] = benchmark("gp.log_likelihood(y[:{0}])".format(n), "from __main__ import gp, y") if n <= 4096: times[i, 2] = benchmark( """ C = gp.get_matrix(t[:{0}]) C[np.diag_indices_from(C)] += yerr[:{0}]**2 cho_factor(C) """.format(n), """ from __main__ import gp, t, yerr import numpy as np from scipy.linalg import cho_factor """) print(n, times[i]) fig, ax = plt.subplots(1, 1, figsize=plot_setup.get_figsize(1, 1)) ax.plot(N, N / N[-1] * 2.0, "k", label="$\mathcal{O}(N)$") ax.plot(N, times[:, 0], ".-", label="compute") ax.plot(N, times[:, 1], ".--", label="log likelihood") m = np.isfinite(times[:, 2]) ax.plot(N[m], times[:, 2][m], ".:", label="Cholesky") ax.set_xscale("log") ax.set_yscale("log") ax.set_xlim(N.min(), N.max()) ax.set_ylim(2e-5, 3.0) ax.set_xlabel("number of data points") ax.set_ylabel("computational cost [seconds]") fig.savefig("benchmark.pdf", bbox_inches="tight")
if not np.isfinite(lp): return -np.inf ll = gp.log_likelihood(y) return ll + lp if np.isfinite(ll) else -np.inf ndim = len(soln.x) nwalkers = 32 coords = soln.x + 1e-4 * np.random.randn(nwalkers, ndim) sampler = emcee.EnsembleSampler(nwalkers, ndim, log_probability) coords, _, _ = sampler.run_mcmc(coords, 500) sampler.reset() coords, _, _ = sampler.run_mcmc(coords, 2000) # Plot the results fig, (ax1, ax2) = plt.subplots(1, 2, figsize=plot_setup.get_figsize(1, 2)) ax1.errorbar(t, y, yerr=yerr, fmt=".k", capsize=0) ax1.set_ylim(-3.25, 3.25) ax1.set_xlim(0, 20) ax1.set_xlabel("time [day]") ax1.set_ylabel("relative flux [ppm]") ax1.annotate("simulated data", xy=(0, 0), xycoords="axes fraction", xytext=(5, 5), textcoords="offset points", ha="left", va="bottom") n, b, p = ax2.hist(np.exp(-sampler.flatchain[:, -2]) * (2 * np.pi),
fn = os.path.join(args.directory, fn) data = pd.read_csv(fn, comment="#") data_matrix = np.empty((data.xi.max() + 1, data.yi.max() + 1)) data_matrix[:] = np.nan data_matrix[data.xi, data.yi] = data.comp_time + data.ll_time np_time = np.array(data.numpy_comp_time + data.numpy_ll_time) np_m = np.isfinite(np_time) np_time = np_time[np_m] np_n = np.array(data.n)[np_m] np_j = np.array(data.j)[np_m] J = np.sort(np.array(data.j.unique())) N = np.sort(np.array(data.n.unique())) fig, (ax1, ax2) = plt.subplots(1, 2, figsize=get_figsize(1, 2), sharey=True) for i, j in enumerate(J): x = N y = data_matrix[i] m = np.isfinite(y) ax1.plot(x[m], y[m], ".-", color=COLOR_CYCLE[i], label="{0:.0f}".format(j)) if len(np_time): ax1.plot(np_n, np_time, ":k", label="direct") if suffix == "_george": f = N * np.log(N)**2 ax1.plot(N, 4.0 * f / f[-1], ":k", label=r"$\mathcal{O}(N\,\log^2N)$") ax1.plot(N, 4e-2 * N / N[-1], "k", label=r"$\mathcal{O}(N)$") ax1.legend(loc="lower right", bbox_to_anchor=(1.05, 0), fontsize=8)
x.append(x0[m]) mu = np.median(y0[m]) y.append((y0[m] / mu - 1.0) * 1e6) yerr.append(1e6 * data["PDCSAP_FLUX_ERR"][m] / mu) x = np.concatenate(x) y = np.concatenate(y) yerr = np.concatenate(yerr) inds = np.argsort(x) x = np.ascontiguousarray(x[inds], dtype=float) y = np.ascontiguousarray(y[inds], dtype=float) yerr = np.ascontiguousarray(yerr[inds], dtype=float) # Plot the light curve. fig, ax = plt.subplots(1, 1, figsize=get_figsize()) ax.plot(x, y, "k", rasterized=True) ax.set_xlim(x.min(), x.max()) ax.set_ylim(np.std(y) * np.array([-5.0, 5.0])) ax.set_xlabel("time [KBJD]") ax.set_ylabel("relative flux [ppm]") ax.xaxis.set_major_locator(plt.MaxNLocator(4)) fig.savefig(format_filename("time_series"), bbox_inches="tight", dpi=300) plt.close(fig) # Define a frequency grid for the periodogram freq_uHz = np.linspace(1, 300, 100000) freq = freq_uHz * uHz_conv # Compute the periodogram on the full dataset model = LombScargle(x, y)
print(gp.get_parameter_dict()) gp.set_parameter_vector(best[1]) ml_params = np.array(best[1]) # Compute the model predictions gp.set_parameter_vector(ml_params) x = np.linspace(t.min(), t.max(), 5000) mu, var = gp.predict(y, x, return_var=True) omega = np.exp(np.linspace(np.log(0.1), np.log(10), 5000)) psd = gp.kernel.get_psd(omega) period = np.exp(gp.get_parameter("kernel:log_period")) tau = np.linspace(0, 4 * period, 5000) acf = gp.kernel.get_value(tau) # Set up the figure fig, axes = plt.subplots(1, 3, figsize=get_figsize(1, 3)) # Plot the data ax = axes[0] color = COLORS["MODEL_1"] ax.plot(t - 380, y, ".k", rasterized=True, zorder=-1) ax.plot(x - 380, mu, color=color, zorder=100) ax.fill_between(x - 380, mu + np.sqrt(var), mu - np.sqrt(var), color=color, alpha=0.3, zorder=100) ax.set_xlim(0, 50) ax.set_ylim(-1.2, 1.2) ax.set_xlabel("time [days]")
tau = np.max(tau) burnin = int(10 * tau) tau2 = emcee3.autocorr.integrated_time(mean_traces[burnin:], c=3, axis=0) print("tau: {0}".format(tau2)) print("N_ind: {0}".format(np.prod(samples.shape[:2]) / tau2)) # Plot the traces names = list(gp.get_parameter_names()) for i in range(len(names)): name = names[i].split(":")[-1] if name.startswith("log"): name = "log("+name[4:]+")" names[i] = name.replace("_", " ") fig, axes = plt.subplots(samples.shape[-1] + 1, 1, sharex=True, figsize=get_figsize(samples.shape[-1]//2, 2)) for i in range(samples.shape[-1]): axes[i].plot(samples[:, :, i], color="k", alpha=0.3, rasterized=True) axes[i].set_ylabel(names[i]) axes[i].yaxis.set_major_locator(plt.MaxNLocator(5)) axes[i].axvline(burnin, color=COLORS["MODEL_1"]) axes[-1].plot(backend.get_log_probability(), color="k", alpha=0.3, rasterized=True) axes[-1].set_ylabel("log(prob)") axes[-1].yaxis.set_major_locator(plt.MaxNLocator(5)) axes[-1].axvline(burnin, color=COLORS["MODEL_1"]) fig.savefig(format_filename("trace")) plt.close(fig)
data_matrix[:] = np.nan data_matrix[data.xi, data.yi] = data.comp_time + data.ll_time try: np_time = np.array(data.numpy_comp_time + data.numpy_ll_time) except AttributeError: np_time = np.nan + np.zeros(len(data)) np_m = np.isfinite(np_time) np_time = np_time[np_m] np_n = np.array(data.n)[np_m] np_j = np.array(data.j)[np_m] J = np.sort(np.array(data.j.unique())) N = np.sort(np.array(data.n.unique())) fig, (ax1, ax2) = plt.subplots(1, 2, figsize=get_figsize(1, 2), sharey=True) for i, j in enumerate(J): x = N y = data_matrix[i] m = np.isfinite(y) ax1.plot(x[m], y[m], ".-", color=COLOR_CYCLE[i], label="{0:.0f}".format(j)) if len(np_time): ax1.plot(np_n, np_time, ":k", label="direct") if suffix == "_george": f = N * np.log(N)**2 ax1.plot(N, 4.0 * f / f[-1], ":k", label=r"$\mathcal{O}(N\,\log^2N)$") ax1.plot(N, 3e-2 * N / N[-1], "k", label=r"$\mathcal{O}(N)$")
if np.allclose(Q, 0.5): return np.exp(-t) * (1.0 + t) b = 1.0 / np.sqrt(4*Q**2 - 1) c = 0.5 / Q d = 0.5 * np.sqrt(4*Q**2 - 1) / Q return np.exp(-c * t) * (np.cos(d*t)+b*np.sin(d*t)) def lorentz_psd(Q, x): return Q**2 * (1.0 / ((x - 1)**2 * (2*Q)**2 + 1) + 1.0 / ((x + 1)**2 * (2*Q)**2 + 1)) def lorentz_acf(Q, tau): t = np.abs(tau) return np.exp(-0.5*t/Q) * np.cos(t) fig, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=get_figsize(1, 3)) x = 10**np.linspace(-1.1, 1.1, 5000) tau = np.linspace(0, 20, 1000) for i, (Q_name, Q) in enumerate( [("1/2", 0.5), ("1/\\sqrt{2}", 1./np.sqrt(2)), ("2", 2.0), ("10", 10.0)]): l, = ax1.plot(x, sho_psd(Q, x), label="$Q = {0}$".format(Q_name), lw=1.5) c = l.get_color() ax2.plot(tau, sho_acf(Q, tau), label="$Q = {0}$".format(Q_name), lw=1.5, color=c) K = sho_acf(Q, tau[:, None] - tau[None, :]) y = np.random.multivariate_normal(np.zeros(len(tau)), K, size=3) ax3.axhline(-5*i, color="k", lw=0.75) ax3.plot(tau, -5*i + (y - np.mean(y, axis=1)[:, None]).T, color=c,
ml_params = np.array(r.x) print("Maximum log-likelihood: {0}".format(gp.log_likelihood(y))) # Compute the maximum likelihood predictions x = np.linspace(t.min(), t.max(), 5000) trend = gp.predict(y, t, return_cov=False) trend -= gp.mean.get_value(t) - gp.mean.mean_flux mu, var = gp.predict(y, x, return_var=True) std = np.sqrt(var) mean_mu = gp.mean.get_value(x) mu -= mean_mu wn = np.exp(gp.log_white_noise.value) ml_yerr = np.sqrt(yerr**2 + wn) # Plot the maximum likelihood predictions fig, (ax1, ax2) = plt.subplots(2, 1, sharex=True, figsize=get_figsize(1, 2)) ax1.errorbar(t - t.min(), y, yerr=ml_yerr, fmt=".k", capsize=0) ax1.plot(x - t.min(), mu) ax1.set_ylim(-0.72, 0.72) ax1.yaxis.set_major_locator(plt.MaxNLocator(5)) ax1.set_ylabel("raw [ppt]") ax2.errorbar(t - t.min(), y - trend, yerr=ml_yerr, fmt=".k", capsize=0) ax2.plot(x - t.min(), mean_mu - gp.mean.mean_flux) ax2.set_xlim(0, t.max() - t.min()) ax2.set_ylim(-0.41, 0.1) ax2.yaxis.set_major_locator(plt.MaxNLocator(5)) ax2.set_ylabel("de-trended [ppt]") ax2.set_xlabel("time [days]") fig.savefig("transit-ml.pdf")
for i in range(8): full[-2 * d - 2 * d * i:-d - 2 * d * i, d:2 * d] = 8 - i # Seaborn set1 c = [(0.89411765336990356, 0.10196078568696976, 0.10980392247438431), (0.21602460800432691, 0.49487120380588606, 0.71987698697576341), (0.30426760128900115, 0.68329106055054012, 0.29293349969620797), (0.60083047361934883, 0.30814303335021526, 0.63169552298153153), (1.0, 0.50591311045721465, 0.0031372549487095253), (0.99315647868549117, 0.9870049982678657, 0.19915417450315812), (0.65845446095747107, 0.34122261685483596, 0.1707958535236471), (0.95850826852461868, 0.50846600392285513, 0.74492888871361229), (0.60000002384185791, 0.60000002384185791, 0.60000002384185791), (0.89411765336990356, 0.10196078568696976, 0.10980392247438431)] cmap = matplotlib.colors.ListedColormap(["white"] + list(c), name="cmap") fig, ax = plt.subplots(1, 1, figsize=get_figsize(2.3, 2.3)) ax.pcolor(full, cmap=cmap, vmin=0, vmax=len(c)) # Plot the edges for i, j in product(range(full_dim), range(full_dim)): if full[i, j] == 0: continue ax.plot((j, j, j + 1, j + 1, j), (i, i + 1, i + 1, i, i), "k", lw=0.5) ax.set_ylim(full_dim, 0) ax.set_xlim(0, full_dim) ax.set_frame_on(False) ax.set_xticks([]) ax.set_yticks([]) names = [