def _update_convergence_plots( database, criterion_cds, param_cds, session_data, start_params, rollover, update_chunk, stride, ): """Callback to look up new entries in the database and plot them. Args: database (sqlalchemy.MetaData) session_data (dict): infos to be passed between and within apps. Keys of this app's entry are: - last_retrieved (int): last iteration currently in the ColumnDataSource - database_path start_params (pd.DataFrame) rollover (int): maximal number of points to show in the plot update_chunk (int): Number of values to add at each update. criterion_cds (bokeh.ColumnDataSource) param_cds (bokeh.ColumnDataSource) stride (int): Plot every stride_th database row in the dashboard. Note that some database rows only contain gradient evaluations, thus for some values of stride the convergence plot of the criterion function can be empty. """ clip_bound = np.finfo(float).max data, new_last = read_new_rows( database=database, table_name="optimization_iterations", last_retrieved=session_data["last_retrieved"], return_type="dict_of_lists", limit=update_chunk, stride=stride, ) # update the criterion plot # todo: remove None entries! missing = [i for i, val in enumerate(data["value"]) if val is None] crit_data = { "iteration": [id_ for i, id_ in enumerate(data["rowid"]) if i not in missing], "criterion": [ np.clip(val, -clip_bound, clip_bound) for i, val in enumerate(data["value"]) if i not in missing ], } _stream_data(cds=criterion_cds, data=crit_data, rollover=rollover) # update the parameter plots # Note: we need **all** parameter ids to correctly map them to the parameter entries # in the database. Only after can we restrict them to the entries we need. param_ids = start_params["id"].tolist() params_data = _create_params_data_for_update(data, param_ids, clip_bound) _stream_data(cds=param_cds, data=params_data, rollover=rollover) # update last retrieved session_data["last_retrieved"] = new_last
def test_steps_table(tmp_path): path = tmp_path / "test.db" database = load_database(path=path) make_steps_table(database) for status in ["scheduled", "running", "completed"]: append_row( { "status": status, "n_iterations": 0, "type": "optimization", "name": "bla", }, "steps", database, path, False, ) res, _ = read_new_rows(database, "steps", 1, "dict_of_lists") expected = { "rowid": [2, 3], "status": ["running", "completed"], "type": ["optimization", "optimization"], "name": ["bla", "bla"], "n_iterations": [0, 0], } assert res == expected
def _read_optimization_history(database, params_treedef, registry): """Read a histories out values, parameters and other information.""" raw_res, _ = read_new_rows( database=database, table_name="optimization_iterations", last_retrieved=0, return_type="list_of_dicts", ) history = {"params": [], "criterion": [], "runtime": []} for data in raw_res: if data["value"] is not None: params = tree_unflatten(params_treedef, data["params"], registry=registry) history["params"].append(params) history["criterion"].append(data["value"]) history["runtime"].append(data["timestamp"]) times = np.array(history["runtime"]) times -= times[0] history["runtime"] = times return history
def test_optimization_status_table(tmp_path): path = tmp_path / "test.db" database = load_database(path=path) make_optimization_status_table(database) for status in ["scheduled", "running", "success"]: append_row({"status": status}, "optimization_status", database, path, False) res, _ = read_new_rows(database, "optimization_status", 1, "dict_of_lists") expected = {"rowid": [2, 3], "status": ["running", "success"]} assert res == expected
def test_read_new_rows_stride(tmp_path, iteration_data): path = tmp_path / "test.db" database = load_database(path=path) make_optimization_iteration_table(database, first_eval={"output": 0.5}) for i in range(1, 11): # sqlalchemy starts counting at 1 iteration_data["value"] = i append_row(iteration_data, "optimization_iterations", database, path, False) res = read_new_rows( database=database, table_name="optimization_iterations", last_retrieved=1, return_type="dict_of_lists", stride=2, )[0]["value"] expected = [2.0, 4.0, 6.0, 8.0, 10.0] assert res == expected
def test_read_new_rows_with_step(tmp_path, iteration_data): path = tmp_path / "test.db" database = load_database(path=path) make_optimization_iteration_table(database, first_eval={"output": 0.5}) for i in range(1, 11): # sqlalchemy starts counting at 1 iteration_data["value"] = i iteration_data["step"] = i % 2 append_row(iteration_data, "optimization_iterations", database, path, False) res, _ = read_new_rows( database=database, table_name="optimization_iterations", last_retrieved=0, return_type="dict_of_lists", step=0, ) expected = [2, 4, 6, 8, 10] assert res["rowid"] == expected
def test_update_row(tmp_path, iteration_data): path = tmp_path / "test.db" database = load_database(path=path) make_optimization_iteration_table(database, first_eval={"output": 0.5}) for i in range(1, 11): # sqlalchemy starts counting at 1 iteration_data["value"] = i append_row(iteration_data, "optimization_iterations", database, path, False) update_row({"value": 20}, 8, "optimization_iterations", database, path, False) res = read_new_rows( database=database, table_name="optimization_iterations", last_retrieved=3, return_type="dict_of_lists", )[0]["value"] expected = [4, 5, 6, 7, 20, 9, 10] assert res == expected
def read_optimization_histories(path_or_database): """Read a histories out values, parameters and other information.""" database = load_database(**_process_path_or_database(path_or_database)) start_params = read_start_params(path_or_database) raw_res, _ = read_new_rows( database=database, table_name="optimization_iterations", last_retrieved=0, return_type="dict_of_lists", ) params_history = pd.DataFrame(raw_res["params"], columns=start_params.index) value_history = pd.Series(raw_res["value"]) metadata = pd.DataFrame() metadata["timestamps"] = raw_res["timestamp"] metadata["valid"] = raw_res["valid"] metadata["has_value"] = value_history.notnull() metadata["has_derivative"] = [ d is not None for d in raw_res["internal_derivative"] ] histories = { "values": value_history.dropna(), "params": params_history, "metadata": metadata, } if "contributions" in raw_res: first_contrib = raw_res["contributions"][0] if isinstance(first_contrib, pd.Series): columns = first_contrib.index else: columns = None contributions_history = pd.DataFrame(raw_res["contributions"], columns=columns).dropna() histories["contributions"] = contributions_history return histories
def read_optimization_problem_table(path_or_database): """Load the start parameters DataFrame. Args: path_or_database (pathlib.Path, str or sqlalchemy.MetaData) Returns: params (pd.DataFrame): see :ref:`params`. """ database = _load_database(path_or_database) steps_table, _ = read_new_rows( database=database, table_name="optimization_problem", last_retrieved=0, return_type="list_of_dicts", ) steps_df = pd.DataFrame(steps_table) return steps_df
def read_steps_table(path_or_database): """Load the steps table. Args: path_or_database (pathlib.Path, str or sqlalchemy.MetaData) Returns: steps_df (pandas.DataFrame) """ database = _load_database(path_or_database) steps_table, _ = read_new_rows( database=database, table_name="steps", last_retrieved=0, return_type="list_of_dicts", ) steps_df = pd.DataFrame(steps_table) return steps_df
def test_all_steps_occur_in_optimization_iterations_if_no_convergence(params): options = {"convergence_max_discoveries": np.inf} minimize( criterion=sos_dict_criterion, params=params, algorithm="scipy_lbfgsb", multistart=True, multistart_options=options, logging="logging.db", ) database = load_database(path="logging.db") iterations, _ = read_new_rows( database=database, table_name="optimization_iterations", last_retrieved=0, return_type="dict_of_lists", ) present_steps = set(iterations["step"]) assert present_steps == {1, 2, 3, 4, 5}
def _read_multistart_optimization_history(database, params_treedef, registry, direction): """Read multistart histories out values, parameters and other information. Returns: tuple: - dict: history that led to lowest criterion - dict: all other histories - dict: exploration phase """ # ================================================================================== # Process raw data # ================================================================================== steps = read_steps_table(database) raw_res, _ = read_new_rows( database=database, table_name="optimization_iterations", last_retrieved=0, return_type="list_of_dicts", ) history = {"params": [], "criterion": [], "runtime": [], "step": []} for data in raw_res: if data["value"] is not None: params = tree_unflatten(params_treedef, data["params"], registry=registry) history["params"].append(params) history["criterion"].append(data["value"]) history["runtime"].append(data["timestamp"]) history["step"].append(data["step"]) times = np.array(history["runtime"]) times -= times[0] history["runtime"] = times # ================================================================================== # Format data as data frames # ================================================================================== df = pd.DataFrame(history) df = df.merge(steps[["rowid", "type"]], left_on="step", right_on="rowid") df = df.drop(columns="rowid") # ================================================================================== # Extract data from df # ================================================================================== exploration = df.query("type == 'exploration'").drop( columns=["step", "type"]) histories = df.query("type == 'optimization'") histories = histories.drop(columns="type") histories = histories.set_index("step", append=True) # ================================================================================== # The best history is given by the history that attains the global minimum or # maximum. All other histories are defined as local histories. if direction == "minimize": best_idx = ( histories["criterion"].groupby(level="step").min().idxmin() ) # noqa: F841 exploration = exploration.sort_values(by="criterion", ascending=True) elif direction == "maximize": best_idx = ( histories["criterion"].groupby(level="step").max().idxmax() ) # noqa: F841 exploration = exploration.sort_values(by="criterion", ascending=False) else: raise ValueError() history = histories.xs(best_idx, level="step").to_dict(orient="list") exploration = None if len(exploration) == 0 else exploration if exploration is not None: exploration = exploration.to_dict(orient="list") local_histories = [] for idx in histories.index.get_level_values("step").unique().difference( [best_idx]): _local_history = histories.xs(idx, level="step").to_dict(orient="list") local_histories.append(_local_history) local_histories = None if len(local_histories) == 0 else local_histories return history, local_histories, exploration