def test_load_database_after_pickling(tmp_path): """Pickling unsets database.bind. Test that load_database sets it again.""" path = tmp_path / "test.db" database = load_database(path=path) database = pickle.loads(pickle.dumps(database)) database = load_database(metadata=database, path=path) assert database.bind is not None
def test_steps_table(tmp_path): path = tmp_path / "test.db" database = load_database(path=path) make_steps_table(database) for status in ["scheduled", "running", "completed"]: append_row( { "status": status, "n_iterations": 0, "type": "optimization", "name": "bla", }, "steps", database, path, False, ) res, _ = read_new_rows(database, "steps", 1, "dict_of_lists") expected = { "rowid": [2, 3], "status": ["running", "completed"], "type": ["optimization", "optimization"], "name": ["bla", "bla"], "n_iterations": [0, 0], } assert res == expected
def test_read_optimization_iteration(tmp_path): path = tmp_path / "test.db" database = load_database(path=path) # add the optimization_iterations table make_optimization_iteration_table(database, first_eval={"output": 0.5}) iteration_data = [ {"external_params": np.array([0])}, {"external_params": np.array([1])}, {"external_params": np.array([2])}, ] for data in iteration_data: append_row(data, "optimization_iterations", database, path, False) # add the optimization_problem table make_optimization_problem_table(database) problem_data = {"params": pd.DataFrame(data=[10], columns=["value"])} append_row(problem_data, "optimization_problem", database, path, False) first_row_calc = read_optimization_iteration(path, 0) assert first_row_calc["rowid"] == 1 calculated_params = first_row_calc["params"] expected_params = pd.DataFrame(data=[0], columns=["value"]) assert_frame_equal(calculated_params, expected_params, check_dtype=False) last_row_calc = read_optimization_iteration(path, -1) assert last_row_calc["rowid"] == 3 calculated_params = last_row_calc["params"] expected_params = pd.DataFrame(data=[2], columns=["value"]) assert_frame_equal(calculated_params, expected_params, check_dtype=False)
def read_optimization_iteration(path_or_database, iteration, include_internals=False): """Get information about an optimization iteration. Args: path_or_database (pathlib.Path, str or sqlalchemy.MetaData) iteration (int): The index of the iteration that should be retrieved. The row_id behaves as Python list indices, i.e. ``0`` identifies the first iteration, ``-1`` the last one, etc. include_internals (bool): Whether internally used quantities like the internal parameter vector and the corresponding derivative etc. are included in the result. Default False. This should only be used by advanced users. Returns: dict: The logged information corresponding to the iteration. The keys correspond to database columns. Raises: KeyError: if the iteration is out of bounds. """ database = load_database(**_process_path_or_database(path_or_database)) start_params = read_start_params(database) if iteration >= 0: rowid = iteration + 1 else: last_iteration = read_last_rows( database=database, table_name="optimization_iterations", n_rows=1, return_type="list_of_dicts", ) highest_rowid = last_iteration[0]["rowid"] rowid = highest_rowid + iteration + 1 data = read_specific_row( database=database, table_name="optimization_iterations", rowid=rowid, return_type="list_of_dicts", ) if len(data) == 0: raise IndexError(f"Invalid iteration requested: {iteration}") else: data = data[0] params = start_params.copy() params["value"] = data.pop("external_params") data["params"] = params to_remove = ["distance_origin", "distance_ones"] if not include_internals: to_remove += ["internal_params", "internal_derivative"] for key in to_remove: if key in data: del data[key] return data
def test_optimization_iteration_table_vector_valued(tmp_path): path = tmp_path / "test.db" database = load_database(path=path) make_optimization_iteration_table( database, first_eval={"output": {"contributions": np.ones(3), "value": 0.5}} ) assert isinstance( database.tables["optimization_iterations"].columns["contributions"].type, PickleType, )
def test_optimization_status_table(tmp_path): path = tmp_path / "test.db" database = load_database(path=path) make_optimization_status_table(database) for status in ["scheduled", "running", "success"]: append_row({"status": status}, "optimization_status", database, path, False) res, _ = read_new_rows(database, "optimization_status", 1, "dict_of_lists") expected = {"rowid": [2, 3], "status": ["running", "success"]} assert res == expected
def _create_and_initialize_database(logging, log_options, first_eval, problem_data): # extract information path = Path(logging) fast_logging = log_options.get("fast_logging", False) if_table_exists = log_options.get("if_table_exists", "extend") if_database_exists = log_options.get("if_database_exists", "extend") if "if_exists" in log_options and "if_table_exists" not in log_options: warnings.warn( "The log_option 'if_exists' was renamed to 'if_table_exists'.") if logging.exists(): if if_database_exists == "raise": raise FileExistsError( f"The database {logging} already exists and the log_option " "'if_database_exists' is set to 'raise'") elif if_database_exists == "replace": logging.unlink() database = load_database(path=path, fast_logging=fast_logging) # create the optimization_iterations table make_optimization_iteration_table( database=database, first_eval=first_eval, if_exists=if_table_exists, ) # create and initialize the steps table; This is alway extended if it exists. make_steps_table(database, if_exists=if_table_exists) # create_and_initialize the optimization_problem table make_optimization_problem_table(database, if_exists=if_table_exists) not_saved = [ "criterion", "criterion_kwargs", "constraints", "derivative", "derivative_kwargs", "criterion_and_derivative", "criterion_and_derivative_kwargs", ] problem_data = { key: val for key, val in problem_data.items() if key not in not_saved } append_row(problem_data, "optimization_problem", database, path, fast_logging) return database
def test_optimization_iteration_table_scalar(tmp_path, iteration_data): path = tmp_path / "test.db" database = load_database(path=path) make_optimization_iteration_table(database, first_eval={"output": 0.5}) append_row(iteration_data, "optimization_iterations", database, path, False) res = read_last_rows(database, "optimization_iterations", 1, "list_of_dicts") assert isinstance(res, list) and isinstance(res[0], dict) res = res[0] assert res["rowid"] == 1 assert_array_equal(res["params"], iteration_data["params"]) for key in ["value", "timestamp"]: assert res[key] == iteration_data[key]
def test_optimization_problem_table(tmp_path, problem_data): path = tmp_path / "test.db" database = load_database(path=path) make_optimization_problem_table(database) append_row(problem_data, "optimization_problem", database, path, False) res = read_last_rows(database, "optimization_problem", 1, "list_of_dicts")[0] assert res["rowid"] == 1 for key, expected in problem_data.items(): if key == "criterion": assert res[key](np.ones(3)) == 3 elif isinstance(expected, np.ndarray): assert_array_equal(res[key], expected) else: assert res[key] == expected
def test_optimization_iteration_table_dict_valued(tmp_path): path = tmp_path / "test.db" database = load_database(path=path) first_eval = { "output": {"contributions": np.ones(3), "value": 5, "bla": pd.DataFrame()} } make_optimization_iteration_table(database, first_eval=first_eval) for col in ["contributions", "bla"]: assert isinstance( database.tables["optimization_iterations"].columns[col].type, PickleType ) assert isinstance( database.tables["optimization_iterations"].columns["value"].type, Float )
def _load_database(path_or_database): """Get an sqlalchemy.MetaDate object from path or database.""" res = {"path": None, "metadata": None, "fast_logging": False} if isinstance(path_or_database, MetaData): res = path_or_database elif isinstance(path_or_database, (Path, str)): path = Path(path_or_database) if not path.exists(): raise FileNotFoundError(f"No such database file: {path}") res = load_database(path=path) else: raise ValueError( "path_or_database must be a path or sqlalchemy.MetaData object") return res
def test_read_table(tmp_path, iteration_data): path = tmp_path / "test.db" database = load_database(path=path) make_optimization_iteration_table(database, first_eval={"output": 0.5}) for i in range(1, 11): # sqlalchemy starts counting at 1 iteration_data["value"] = i iteration_data["step"] = i % 2 append_row(iteration_data, "optimization_iterations", database, path, False) table = read_table( database=database, table_name="optimization_iterations", return_type="dict_of_lists", ) assert table["rowid"] == list(range(1, 11)) assert table["step"] == [1, 0] * 5
def test_read_last_rows_stride(tmp_path, iteration_data): path = tmp_path / "test.db" database = load_database(path=path) make_optimization_iteration_table(database, first_eval={"output": 0.5}) for i in range(1, 11): # sqlalchemy starts counting at 1 iteration_data["value"] = i append_row(iteration_data, "optimization_iterations", database, path, False) res = read_last_rows( database=database, table_name="optimization_iterations", n_rows=3, return_type="dict_of_lists", stride=2, )["value"] expected = [6.0, 8.0, 10.0] assert res == expected
def test_update_row(tmp_path, iteration_data): path = tmp_path / "test.db" database = load_database(path=path) make_optimization_iteration_table(database, first_eval={"output": 0.5}) for i in range(1, 11): # sqlalchemy starts counting at 1 iteration_data["value"] = i append_row(iteration_data, "optimization_iterations", database, path, False) update_row({"value": 20}, 8, "optimization_iterations", database, path, False) res = read_new_rows( database=database, table_name="optimization_iterations", last_retrieved=3, return_type="dict_of_lists", )[0]["value"] expected = [4, 5, 6, 7, 20, 9, 10] assert res == expected
def test_read_last_rows_with_step(tmp_path, iteration_data): path = tmp_path / "test.db" database = load_database(path=path) make_optimization_iteration_table(database, first_eval={"output": 0.5}) for i in range(1, 11): # sqlalchemy starts counting at 1 iteration_data["value"] = i iteration_data["step"] = i % 2 append_row(iteration_data, "optimization_iterations", database, path, False) res = read_last_rows( database=database, table_name="optimization_iterations", n_rows=20, return_type="dict_of_lists", step=0, ) expected = [2, 4, 6, 8, 10] assert res["rowid"] == expected
def read_start_params(path_or_database): """Load the start parameters DataFrame. Args: path_or_database (pathlib.Path, str or sqlalchemy.MetaData) Returns: params (pd.DataFrame): see :ref:`params`. """ database = load_database(**_process_path_or_database(path_or_database)) optimization_problem = read_last_rows( database=database, table_name="optimization_problem", n_rows=1, return_type="dict_of_lists", ) start_params = optimization_problem["params"][0] return start_params
def _create_and_initialize_database(logging, log_options, first_eval, problem_data): # extract information path = logging fast_logging = log_options.get("fast_logging", False) if_exists = log_options.get("if_exists", "extend") save_all_arguments = log_options.get("save_all_arguments", False) database = load_database(path=path, fast_logging=fast_logging) # create the optimization_iterations table make_optimization_iteration_table( database=database, first_eval=first_eval, if_exists=if_exists, ) # create and initialize the optimization_status table make_optimization_status_table(database, if_exists) append_row({"status": "running"}, "optimization_status", database, path, fast_logging) # create_and_initialize the optimization_problem table make_optimization_problem_table(database, if_exists, save_all_arguments) if not save_all_arguments: not_saved = [ "criterion", "criterion_kwargs", "constraints", "derivative", "derivative_kwargs", "criterion_and_derivative", "criterion_and_derivative_kwargs", ] problem_data = { key: val for key, val in problem_data.items() if key not in not_saved } append_row(problem_data, "optimization_problem", database, path, fast_logging) return database
def read_optimization_histories(path_or_database): """Read a histories out values, parameters and other information.""" database = load_database(**_process_path_or_database(path_or_database)) start_params = read_start_params(path_or_database) raw_res, _ = read_new_rows( database=database, table_name="optimization_iterations", last_retrieved=0, return_type="dict_of_lists", ) params_history = pd.DataFrame(raw_res["params"], columns=start_params.index) value_history = pd.Series(raw_res["value"]) metadata = pd.DataFrame() metadata["timestamps"] = raw_res["timestamp"] metadata["valid"] = raw_res["valid"] metadata["has_value"] = value_history.notnull() metadata["has_derivative"] = [ d is not None for d in raw_res["internal_derivative"] ] histories = { "values": value_history.dropna(), "params": params_history, "metadata": metadata, } if "contributions" in raw_res: first_contrib = raw_res["contributions"][0] if isinstance(first_contrib, pd.Series): columns = first_contrib.index else: columns = None contributions_history = pd.DataFrame(raw_res["contributions"], columns=columns).dropna() histories["contributions"] = contributions_history return histories
def read_steps_table(path_or_database): """Load the start parameters DataFrame. Args: path_or_database (pathlib.Path, str or sqlalchemy.MetaData) Returns: params (pd.DataFrame): see :ref:`params`. """ database = load_database(**_process_path_or_database(path_or_database)) steps_table, _ = read_new_rows( database=database, table_name="steps", last_retrieved=0, return_type="list_of_dicts", ) steps_df = pd.DataFrame(steps_table) return steps_df
def test_all_steps_occur_in_optimization_iterations_if_no_convergence(params): options = {"convergence_max_discoveries": np.inf} minimize( criterion=sos_dict_criterion, params=params, algorithm="scipy_lbfgsb", multistart=True, multistart_options=options, logging="logging.db", ) database = load_database(path="logging.db") iterations, _ = read_new_rows( database=database, table_name="optimization_iterations", last_retrieved=0, return_type="dict_of_lists", ) present_steps = set(iterations["step"]) assert present_steps == {1, 2, 3, 4, 5}
def dashboard_app( doc, session_data, updating_options, ): """Create plots showing the development of the criterion and parameters. Args: doc (bokeh.Document): Argument required by bokeh. session_data (dict): Infos to be passed between and within apps. Keys of this app's entry are: - last_retrieved (int): last iteration currently in the ColumnDataSource. - database_path (str or pathlib.Path) - callbacks (dict): dictionary to be populated with callbacks. updating_options (dict): Specification how to update the plotting data. It contains rollover, update_frequency, update_chunk, jump and stride. """ # style the Document template_folder = Path(__file__).resolve().parent # conversion to string from pathlib Path is necessary for FileSystemLoader env = Environment(loader=FileSystemLoader(str(template_folder))) doc.template = env.get_template("index.html") # process inputs database = load_database(path=session_data["database_path"]) start_point = _calculate_start_point(database, updating_options) session_data["last_retrieved"] = start_point start_params = read_start_params(path_or_database=database) start_params["id"] = _create_id_column(start_params) group_to_param_ids = _map_group_to_other_column(start_params, "id") group_to_param_names = _map_group_to_other_column(start_params, "name") criterion_history, params_history = _create_cds_for_dashboard( group_to_param_ids) # create elements title_text = """<h1 style="font-size:30px;">estimagic Dashboard</h1>""" title = Row( children=[Div( text=title_text, sizing_mode="scale_width", )], name="title", margin=(5, 5, -20, 5), ) plots = _create_initial_plots( criterion_history=criterion_history, params_history=params_history, group_to_param_ids=group_to_param_ids, group_to_param_names=group_to_param_names, ) restart_button = _create_restart_button( doc=doc, database=database, session_data=session_data, start_params=start_params, updating_options=updating_options, ) button_row = Row( children=[restart_button], name="button_row", ) # add elements to bokeh Document grid = Column(children=[title, button_row, *plots], sizing_mode="stretch_width") doc.add_root(grid) # start the convergence plot immediately # this must happen here befo restart_button.active = True
def test_update_monitoring_tab(): # note: this test database does not include None in the value column. # it has only 7 entries. db_path = Path(__file__).parent / "db1.db" database = load_database(metadata=None, path=db_path) crit_data = {"iteration": [3, 5], "criterion": [-10, -10]} criterion_cds = ColumnDataSource(crit_data) param_data = {f"p{i}": [i, i, i] for i in range(6)} param_data["iteration"] = [3, 4, 5] plotted_param_data = { k: v for k, v in param_data.items() if k in ["p0", "p2", "p4", "iteration"] } param_cds = ColumnDataSource(plotted_param_data) start_params = pd.DataFrame() start_params["group"] = ["g1", "g1", None, None, "g2", "g2"] start_params["id"] = [f"p{i}" for i in range(6)] session_data = {"last_retrieved": 5} tables = [] # not used rollover = 500 update_chunk = 5 expected_crit_data = { "iteration": [3, 5, 6, 7], "criterion": [-10, -10] + [3.371916994681647e-18, 3.3306686770405823e-18], } expected_param_data = plotted_param_data.copy() expected_param_data["iteration"] += [6, 7] expected_param_data["p0"] += [ -7.82732387e-10, -7.45058016e-10, ] expected_param_data["p2"] += [ -7.50570405e-10, -7.45058015e-10, ] expected_param_data["p4"] += [ -7.44958198e-10, -7.45058015e-10, ] _update_monitoring_tab( database=database, criterion_cds=criterion_cds, param_cds=param_cds, session_data=session_data, tables=tables, rollover=rollover, start_params=start_params, update_chunk=update_chunk, stride=1, ) assert session_data["last_retrieved"] == 7 assert criterion_cds.data == expected_crit_data assert param_cds.data == expected_param_data
def test_load_database_with_bound_metadata(tmp_path): """Test that nothing happens when load_database is called with bound MetaData.""" path = tmp_path / "test.db" database = load_database(path=path) new_database = load_database(metadata=database) assert new_database is database
def monitoring_app( doc, database_name, session_data, updating_options, start_immediately, ): """Create plots showing the development of the criterion and parameters. Args: doc (bokeh.Document): Argument required by bokeh. database_name (str): Short and unique name of the database. session_data (dict): Infos to be passed between and within apps. Keys of this app's entry are: - last_retrieved (int): last iteration currently in the ColumnDataSource. - database_path (str or pathlib.Path) - callbacks (dict): dictionary to be populated with callbacks. updating_options (dict): Specification how to update the plotting data. It contains rollover, update_frequency, update_chunk, jump and stride. """ # style the Document template_folder = Path(__file__).resolve().parent # conversion to string from pathlib Path is necessary for FileSystemLoader env = Environment(loader=FileSystemLoader(str(template_folder))) doc.template = env.get_template("index.html") # process inputs database = load_database(path=session_data["database_path"]) start_point = _calculate_start_point(database, updating_options) session_data["last_retrieved"] = start_point start_params = read_start_params(path_or_database=database) start_params["id"] = _create_id_column(start_params) group_to_param_ids = _map_group_to_other_column(start_params, "id") group_to_param_names = _map_group_to_other_column(start_params, "name") criterion_history, params_history = _create_cds_for_monitoring_app( group_to_param_ids) # create elements button_row = _create_button_row( doc=doc, database=database, session_data=session_data, start_params=start_params, updating_options=updating_options, ) monitoring_plots = _create_initial_convergence_plots( criterion_history=criterion_history, params_history=params_history, group_to_param_ids=group_to_param_ids, group_to_param_names=group_to_param_names, ) # add elements to bokeh Document grid = Column(children=[button_row, *monitoring_plots], sizing_mode="stretch_width") convergence_tab = Panel(child=grid, title="Convergence Tab") tabs = Tabs(tabs=[convergence_tab]) doc.add_root(tabs) if start_immediately: activation_button = doc.get_model_by_name("activation_button") activation_button.active = True
def test_load_database_from_path(tmp_path): """Test that database is generated because it does not exist.""" path = tmp_path / "test.db" database = load_database(path=path) assert isinstance(database, sqlalchemy.MetaData) assert database.bind is not None