def optimize_mixture_model(index, init=None): # Select indices and get data. indices = finite_indices[npm.query_around_point(kdt, X[index], **kdt_kwds)] y = np.array([data[ln][indices] for ln in config["predictor_label_names"]]).T if init is None: init = npm.get_initialization_point(y) opt_kwds = dict(init=init, data=dict(y=y, N=y.shape[0], D=y.shape[1])) opt_kwds.update(default_opt_kwds) # Do optimization. with stan.suppress_output(): try: p_opt = npm._check_params_dict(model.optimizing(**opt_kwds)) except: p_opt = None return (index, p_opt, indices)
def optimize_mixture_model(index, init=None): # Select indices and get data. indices = finite_indices[npm.query_around_point(kdt, X[index], **kdt_kwds)] y = np.array([data[ln][indices] for ln in config["predictor_label_names"]]).T if init is None: init = npm.get_initialization_point(y) data_dict = dict(y=y, N=y.shape[0], D=y.shape[1], max_log_y=np.log(np.max(y))) for k, v in bounds.items(): data_dict["{}_bounds".format(k)] = v trial_results = [] for j, init_dict in enumerate((init, npm.get_initialization_point(y), "random")): # CHeck that the parameters are bounded? if isinstance(init_dict, dict): if bounds is not None: for k, (lower, upper) in bounds.items(): if not (upper > init_dict[k] > lower): logging.info("Clipping initial value of {} from {} to within ({}, {})".format( k, init_dict[k], lower, upper)) offset = 0.01 * (upper - lower) init_dict[k] = np.clip(init_dict[k], lower + offset, upper - offset) opt_kwds = dict( init=init_dict, data=data_dict) opt_kwds.update(default_opt_kwds) # Do optimization. with stan.suppress_output() as sm: try: p_opt = model.optimizing(**opt_kwds) except: p_opt = None # TODO: Consider relaxing the optimization tolerances! if p_opt is None: # Capture stdout and stderr so we can read it later. stdout, stderr = sm.stdout, sm.stderr trial_results.append(p_opt) if p_opt is None: logging.warning("Exception when optimizing on index {} from "\ "initial point {}:".format(index, init_dict)) logging.warning(stdout) logging.warning(stderr) raise else: tolerance = 1e-2 for k, v in p_opt.items(): if k not in bounds \ or (k == "theta" and (v >= 1-tolerance)): continue lower, upper = bounds[k] if np.abs(v - lower) <= tolerance \ or np.abs(v - upper) <= tolerance: logging.warning("Optimised {} at edge of grid ({} < {} < {})" " - ignoring".format(k, lower, v, upper)) break else: break else: # TODO: Consider relaxing optimization tolerances! logging.warning("Optimization did not converge from any initial point "\ "trialled. Consider relaxing optimization tolerances!") import matplotlib.pyplot as plt fig, ax = plt.subplots() ax.hist(y, bins=100, facecolor="#cccccc", normed=True) # Plot a model at some optimization point. init = npm.get_initialization_point(y) N, D = y.shape xi = np.linspace(0, max(y), 1000) ax.plot(xi, npm.norm_pdf(xi, init["mu_single"], init["sigma_single"], init["theta"]), c="r") ax.plot(xi, npm.lognorm_pdf(xi, init["mu_multiple"], init["sigma_multiple"], init["theta"]), c="r", linestyle=":") ax.plot(xi, npm.norm_pdf(xi, default_init["mu_single"], default_init["sigma_single"], default_init["theta"]), c="b") ax.plot(xi, npm.lognorm_pdf(xi, default_init["mu_multiple"], default_init["sigma_multiple"], default_init["theta"]), c="b", linestyle=":") raise a p_opt = npm._check_params_dict(p_opt) return (index, p_opt, indices)
def optimize_mixture_model(index, inits=None, scalar=5): # Select indices and get data. d, nearby_idx, meta = npm.query_around_point( kdt, X[index], **kdt_kwds) y = Y[nearby_idx] ball = X[nearby_idx] if inits is None: inits = npm._get_1d_initialisation_point( y, scalar=scalar, bounds=model_config["bounds"]) # Update meta dictionary with things about the data. meta = dict(max_log_y=np.log(np.max(y)), N=nearby_idx.size, y_percentiles=np.percentile(y, [16, 50, 84]), ball_ptps=np.ptp(ball, axis=0), ball_medians=np.median(ball, axis=0), init_points=inits, kdt_indices=nearby_idx) data_dict = dict(y=y, N=y.size, scalar=scalar) data_dict.update(bounds) #for k, v in model_config["parameter_bounds"].items(): # data_dict["{}_bounds".format(k)] = v p_opts = [] ln_probs = [] for j, init_dict in enumerate(inits): opt_kwds = dict(init=init_dict, data=data_dict) opt_kwds.update(default_opt_kwds) # Do optimization. # TODO: Suppressing output is always dangerous. with stan.suppress_output( config.get("suppress_stan_output", True)) as sm: try: p_opt = model.optimizing(**opt_kwds) except: logging.exception(f"Exception occurred when optimizing index {index}"\ f" from {init_dict}:") else: if p_opt is not None: p_opts.append(p_opt) ln_probs.append( npm.ln_prob(y, 1, *npm._pack_params(**p_opt))) try: p_opt except UnboundLocalError: logging.warning("Stan failed. STDOUT & STDERR:") logging.warning("\n".join(sm.outputs)) if p_opt is None: stdout, stderr = sm.outputs logging.warning(f"STDOUT:\n{stdout}\nSTDERR:\n{stderr}") if len(p_opts) < 1: logging.warning("Optimization on index {} did not converge from any "\ "initial point trialled. Consider relaxing the "\ "optimization tolerances! If this occurs regularly "\ "then something is very wrong!".format(index)) return (index, None, meta) # evaluate best. else: idx = np.argmax(ln_probs) p_opt = p_opts[idx] meta["init_idx"] = idx """ if sum(done) > 550 and sum(done) < 570: theta, mu_single, sigma_single, mu_multiple, sigma_multiple = npm._pack_params(**p_opt) fig, ax = plt.subplots() xi = np.linspace(0, 20, 1000) y_s = npm.norm_pdf(xi, mu_single, sigma_single, theta) y_m = npm.lognorm_pdf(xi, mu_multiple, sigma_multiple, theta) ax.plot(xi, y_s, c="tab:blue") ax.plot(xi, y_m, c="tab:red") p_single = np.exp(np.log(y_s) - logsumexp([np.log(y_s), np.log(y_m)], axis=0)) ax.plot(xi, p_single, c="k") ax.set_title(f"{index}: {theta:.1e} {mu_single:.2f} {sigma_single:.2f} {sigma_multiple:.2f}") ax.hist(y, bins=np.linspace(0, 20, 20), alpha=0.5, facecolor="#666666", normed=True) if sum(done) > 570: raise a """ return (index, p_opt, meta)
def optimize_mixture_model(index, inits=None, debug=False): suppress = config.get("suppress_stan_output", True) # Select indices and get data. d, nearby_idx, meta = npm.query_around_point( kdt, X[index], **kdt_kwds) y = Y[nearby_idx] ball = X[nearby_idx] if inits is None: inits = npm._get_1d_initialisation_point( y, scalar=mu_multiple_scalar, bounds=bounds) # Update meta dictionary with things about the data. meta = dict(max_log_y=np.log(np.max(y)), N=nearby_idx.size, y_percentiles=np.percentile(y, [16, 50, 84]), ball_ptps=np.ptp(ball, axis=0), ball_medians=np.median(ball, axis=0), init_points=inits, kdt_indices=nearby_idx) data_dict = dict(y=y, N=y.size, scalar=mu_multiple_scalar) data_dict.update(stan_bounds) p_opts = [] ln_probs = [] for j, init_dict in enumerate(inits): opt_kwds = dict(init=init_dict, data=data_dict, as_vector=False) opt_kwds.update(default_opt_kwds) # Do optimization. # TODO: Suppressing output is always dangerous. with stan.suppress_output(suppress) as sm: try: p_opt = model.optimizing(**opt_kwds) except: logger.exception(f"Exception occurred when optimizing index {index}"\ f" from {init_dict}:") else: if p_opt is not None: p_opts.append(p_opt["par"]) ln_probs.append( utils.ln_prob( y, 1, *utils._pack_params(**p_opt["par"]), bounds=bounds)) assert abs(ln_probs[-1] - p_opt["value"]) < 1e-8 try: p_opt except UnboundLocalError: logger.warning("Stan failed. STDOUT & STDERR:") logger.warning("\n".join(sm.outputs)) else: if p_opt is None: stdout, stderr = sm.outputs logger.warning("Stan only returned p_opt = None") logger.warning(f"STDOUT:\n{stdout}\nSTDERR:\n{stderr}") if len(p_opts) < 1: logger.warning(f"Optimization on index {index} did not converge"\ "from any initial point trialled. Consider "\ "relaxing the optimization tolerances! If this "\ "occurs regularly then something is very wrong!") return (index, None, meta) else: # evaluate best. idx = np.argmax(ln_probs) p_opt = p_opts[idx] meta["init_idx"] = idx """ # Calculate uncertainties. op_bounds = () def nlp(p): w, mu_s, sigma_s, sigma_m = p mu_m = np.log(mu_s + mu_multiple_scalar * sigma_s) + sigma_m**2 if not (bounds["theta"][1] >= w >= bounds["theta"][0]) \ or not (bounds["mu_single"][1] >= mu_s >= bounds["mu_single"][0]) \ or not (bounds["sigma_multiple"][1] >= sigma_m >= bounds["sigma_multiple"][0]): return np.inf return -utils.ln_likelihood(y, w, mu_s, sigma_s, mu_m, sigma_m) op_bounds = [bounds["theta"], bounds["mu_single"], bounds["sigma_single"], bounds["sigma_multiple"], ] #x0 = utils._pack_params(**p_opt) x0 = (p_opt["theta"], p_opt["mu_single"], p_opt["sigma_single"], p_opt["sigma_multiple"]) p_opt2 = op.minimize(nlp, x0, bounds=op_bounds, method="L-BFGS-B") """ # Create a three-panel figure showing: # (1) a log-density of the HRD + the selected ball points # (2) a log-density of colour vs apparent magnitude + the selected ball points # (3) the jitter + fitted parameters if sampling: chains = 2 # TODO: move to config file. sampling_kwds = dict(data=opt_kwds["data"], init=[p_opt] * chains, chains=chains) try: samples = model.sampling(**sampling_kwds) except: None else: extracted = samples.extract() chains = np.array( [extracted[k] for k in samples.flatnames]).T latex_labels = dict( theta=r"$w$", mu_single=r"$\mu_\mathrm{single}$", sigma_single=r"$\sigma_\mathrm{single}$", mu_multiple=r"$\mu_\mathrm{multiple}$", sigma_multiple=r"$\sigma_\mathrm{multiple}$") corner_fig = corner.corner( chains, labels=[ latex_labels[k] for k in samples.flatnames ]) source_id = S[index] figure_path = os.path.join( figures_dir, f"{model_name}-{source_id}-samples.png") corner_fig.savefig(figure_path, dpi=150) chains_path = os.path.join( figures_dir, f"{model_name}-{source_id}-chains.pkl") dump = dict(names=samples.flatnames, chains=chains, y=y, ball=ball, X=X[index]) with open(chains_path, "wb") as fp: pickle.dump(dump, fp) plt.close("all") if plot_mixture_model_figures: source_id = S[index] figure_path = os.path.join( figures_dir, f"{model_name}-{source_id}.png") x_upper = 2 * config["models"][model_name]["bounds"][ "mu_single"][1] bins = np.linspace(0, x_upper, 51) xi = np.linspace(0, x_upper, 1000) y_s = utils.norm_pdf(xi, p_opt["mu_single"], p_opt["sigma_single"], p_opt["theta"]) y_m = utils.lognorm_pdf(xi, p_opt["mu_multiple"], p_opt["sigma_multiple"], p_opt["theta"]) items_for_deletion = [ axes[0].scatter(ball.T[0], ball.T[1], c="tab:blue", s=1, zorder=10, alpha=0.5), axes[1].scatter(ball.T[0], ball.T[2], c="tab:blue", s=1, zorder=10, alpha=0.5), axes[2].hist(y, bins=bins, facecolor="#cccccc", density=True, zorder=-1)[-1], axes[2].axvline(Y[index], c="#666666"), axes[2].plot(xi, y_s, c="tab:blue"), axes[2].fill_between(xi, np.zeros_like(y_s), y_s, facecolor="tab:blue", alpha=0.25), axes[2].plot(xi, y_m, c="tab:red"), axes[2].fill_between(xi, np.zeros_like(y_m), y_m, facecolor="tab:red", alpha=0.25), ] # Ax limits. axes[0].set_xlim(-0.5, 5) axes[0].set_ylim(10, -15) axes[1].set_xlim(-0.5, 5) axes[1].set_ylim(15, 3) axes[2].set_xlim(0, x_upper) axes[2].set_yticks([]) fig.tight_layout() fig.savefig(figure_path, dpi=150) for item in items_for_deletion: try: item.set_visible(False) except AttributeError: for _ in item: if hasattr(_, "set_visible"): _.set_visible(False) if debug: # Create raise a return (index, p_opt, meta)
def optimize_mixture_model(index, inits=None, debug=False): # Select indices and get data. d, nearby_idx, meta = npm.query_around_point(kdt, X[index], **kdt_kwds) y = Y[nearby_idx] ball = X[nearby_idx] if y.size < kdt_kwds.get("minimum_points", np.inf): logger.warning(f"Danger: minimum number of points not found ({y.size})") if inits is None: inits = npm.get_1d_initialisation_point(y, scalar=mu_multiple_scalar, bounds=bounds) # Update meta dictionary with things about the data. meta = dict() if debug: meta.update(N=nearby_idx.size, y_percentiles=np.percentile(y, [16, 50, 84]), ball_ptps=np.ptp(ball, axis=0), ball_medians=np.median(ball, axis=0), init_points=inits, kdt_indices=nearby_idx) data_dict = dict(y=y, N=y.size, mu_multiple_scalar=mu_multiple_scalar) data_dict.update(stan_bounds) p_opts = [] ln_probs = [] for j, init_dict in enumerate(inits): opt_kwds = dict(init=init_dict, data=data_dict, as_vector=False) opt_kwds.update(default_opt_kwds) with stan.suppress_output(suppress_output=(not debug)) as sm: try: p_opt = model.optimizing(**opt_kwds) except: logger.exception(f"Exception occurred when optimizing index {index}"\ f" from {init_dict}:") else: if p_opt is not None: p_opts.append(p_opt["par"]) ln_probs.append(p_opt["value"]) ''' s = np.argsort(y) fig, ax = plt.subplots() ax.plot(y[s], p_opt["par"]["ll_s"][s], c="tab:blue") ax.plot(y[s], p_opt["par"]["ll_m"][s], c="tab:red") if np.random.choice(200, 1)[0] == 42: raise a ''' try: p_opt except UnboundLocalError: stdout, stderr = sm.outputs logger.warning("Stan failed. STDOUT:") logger.warning(stdout) logger.warning("STDERR:") logger.warning(stderr) else: if p_opt is None: stdout, stderr = sm.outputs logger.warning("Stan only returned p_opt = None") logger.warning(f"STDOUT:\n{stdout}\nSTDERR:\n{stderr}") if len(p_opts) < 1: logger.warning(f"Optimization on index {index} did not converge"\ "from any initial point trialled. Consider "\ "relaxing the optimization tolerances! If this "\ "occurs regularly then something is very wrong!") return (index, None, meta) else: # evaluate best. idx = np.argmax(ln_probs) p_opt = p_opts[idx] meta["init_idx"] = idx return (index, p_opt, meta)