Python load_database示例，estimagic.logging.database_utilities.load_database Python示例

示例#1

0

显示文件

def test_load_database_after_pickling(tmp_path):
    """Pickling unsets database.bind. Test that load_database sets it again."""
    path = tmp_path / "test.db"
    database = load_database(path=path)
    database = pickle.loads(pickle.dumps(database))
    database = load_database(metadata=database, path=path)
    assert database.bind is not None

示例#2

0

显示文件

def test_steps_table(tmp_path):
    path = tmp_path / "test.db"
    database = load_database(path=path)
    make_steps_table(database)
    for status in ["scheduled", "running", "completed"]:
        append_row(
            {
                "status": status,
                "n_iterations": 0,
                "type": "optimization",
                "name": "bla",
            },
            "steps",
            database,
            path,
            False,
        )

    res, _ = read_new_rows(database, "steps", 1, "dict_of_lists")

    expected = {
        "rowid": [2, 3],
        "status": ["running", "completed"],
        "type": ["optimization", "optimization"],
        "name": ["bla", "bla"],
        "n_iterations": [0, 0],
    }
    assert res == expected

示例#3

0

显示文件

文件： test_read_log.py 项目： yradeva93/estimagic

def test_read_optimization_iteration(tmp_path):
    path = tmp_path / "test.db"
    database = load_database(path=path)

    # add the optimization_iterations table
    make_optimization_iteration_table(database, first_eval={"output": 0.5})
    iteration_data = [
        {"external_params": np.array([0])},
        {"external_params": np.array([1])},
        {"external_params": np.array([2])},
    ]

    for data in iteration_data:
        append_row(data, "optimization_iterations", database, path, False)

    # add the optimization_problem table
    make_optimization_problem_table(database)
    problem_data = {"params": pd.DataFrame(data=[10], columns=["value"])}
    append_row(problem_data, "optimization_problem", database, path, False)

    first_row_calc = read_optimization_iteration(path, 0)
    assert first_row_calc["rowid"] == 1
    calculated_params = first_row_calc["params"]
    expected_params = pd.DataFrame(data=[0], columns=["value"])
    assert_frame_equal(calculated_params, expected_params, check_dtype=False)

    last_row_calc = read_optimization_iteration(path, -1)
    assert last_row_calc["rowid"] == 3
    calculated_params = last_row_calc["params"]
    expected_params = pd.DataFrame(data=[2], columns=["value"])
    assert_frame_equal(calculated_params, expected_params, check_dtype=False)

示例#4

0

显示文件

def read_optimization_iteration(path_or_database, iteration, include_internals=False):
    """Get information about an optimization iteration.

    Args:
        path_or_database (pathlib.Path, str or sqlalchemy.MetaData)
        iteration (int): The index of the iteration that should be retrieved.
            The row_id behaves as Python list indices, i.e. ``0`` identifies the
            first iteration, ``-1`` the last one, etc.
        include_internals (bool): Whether internally used quantities like the
            internal parameter vector and the corresponding derivative etc. are included
            in the result. Default False. This should only be used by advanced users.

    Returns:
        dict: The logged information corresponding to the iteration. The keys correspond
            to database columns.

    Raises:
        KeyError: if the iteration is out of bounds.

    """
    database = load_database(**_process_path_or_database(path_or_database))
    start_params = read_start_params(database)
    if iteration >= 0:
        rowid = iteration + 1
    else:
        last_iteration = read_last_rows(
            database=database,
            table_name="optimization_iterations",
            n_rows=1,
            return_type="list_of_dicts",
        )
        highest_rowid = last_iteration[0]["rowid"]

        rowid = highest_rowid + iteration + 1

    data = read_specific_row(
        database=database,
        table_name="optimization_iterations",
        rowid=rowid,
        return_type="list_of_dicts",
    )

    if len(data) == 0:
        raise IndexError(f"Invalid iteration requested: {iteration}")
    else:
        data = data[0]

    params = start_params.copy()
    params["value"] = data.pop("external_params")
    data["params"] = params

    to_remove = ["distance_origin", "distance_ones"]
    if not include_internals:
        to_remove += ["internal_params", "internal_derivative"]
    for key in to_remove:
        if key in data:
            del data[key]

    return data

示例#5

0

显示文件

def test_optimization_iteration_table_vector_valued(tmp_path):
    path = tmp_path / "test.db"
    database = load_database(path=path)
    make_optimization_iteration_table(
        database, first_eval={"output": {"contributions": np.ones(3), "value": 0.5}}
    )
    assert isinstance(
        database.tables["optimization_iterations"].columns["contributions"].type,
        PickleType,
    )

示例#6

0

显示文件

文件： test_database_utilities.py 项目： yradeva93/estimagic

def test_optimization_status_table(tmp_path):
    path = tmp_path / "test.db"
    database = load_database(path=path)
    make_optimization_status_table(database)
    for status in ["scheduled", "running", "success"]:
        append_row({"status": status}, "optimization_status", database, path, False)

    res, _ = read_new_rows(database, "optimization_status", 1, "dict_of_lists")

    expected = {"rowid": [2, 3], "status": ["running", "success"]}
    assert res == expected

示例#7

0

显示文件

def _create_and_initialize_database(logging, log_options, first_eval,
                                    problem_data):
    # extract information
    path = Path(logging)
    fast_logging = log_options.get("fast_logging", False)
    if_table_exists = log_options.get("if_table_exists", "extend")
    if_database_exists = log_options.get("if_database_exists", "extend")

    if "if_exists" in log_options and "if_table_exists" not in log_options:
        warnings.warn(
            "The log_option 'if_exists' was renamed to 'if_table_exists'.")

    if logging.exists():
        if if_database_exists == "raise":
            raise FileExistsError(
                f"The database {logging} already exists and the log_option "
                "'if_database_exists' is set to 'raise'")
        elif if_database_exists == "replace":
            logging.unlink()

    database = load_database(path=path, fast_logging=fast_logging)

    # create the optimization_iterations table
    make_optimization_iteration_table(
        database=database,
        first_eval=first_eval,
        if_exists=if_table_exists,
    )

    # create and initialize the steps table; This is alway extended if it exists.
    make_steps_table(database, if_exists=if_table_exists)

    # create_and_initialize the optimization_problem table
    make_optimization_problem_table(database, if_exists=if_table_exists)

    not_saved = [
        "criterion",
        "criterion_kwargs",
        "constraints",
        "derivative",
        "derivative_kwargs",
        "criterion_and_derivative",
        "criterion_and_derivative_kwargs",
    ]
    problem_data = {
        key: val
        for key, val in problem_data.items() if key not in not_saved
    }

    append_row(problem_data, "optimization_problem", database, path,
               fast_logging)

    return database

示例#8

0

显示文件

def test_optimization_iteration_table_scalar(tmp_path, iteration_data):
    path = tmp_path / "test.db"
    database = load_database(path=path)
    make_optimization_iteration_table(database, first_eval={"output": 0.5})
    append_row(iteration_data, "optimization_iterations", database, path, False)
    res = read_last_rows(database, "optimization_iterations", 1, "list_of_dicts")
    assert isinstance(res, list) and isinstance(res[0], dict)
    res = res[0]
    assert res["rowid"] == 1
    assert_array_equal(res["params"], iteration_data["params"])

    for key in ["value", "timestamp"]:
        assert res[key] == iteration_data[key]

示例#9

0

显示文件

def test_optimization_problem_table(tmp_path, problem_data):
    path = tmp_path / "test.db"
    database = load_database(path=path)
    make_optimization_problem_table(database)
    append_row(problem_data, "optimization_problem", database, path, False)
    res = read_last_rows(database, "optimization_problem", 1, "list_of_dicts")[0]
    assert res["rowid"] == 1
    for key, expected in problem_data.items():
        if key == "criterion":
            assert res[key](np.ones(3)) == 3
        elif isinstance(expected, np.ndarray):
            assert_array_equal(res[key], expected)
        else:
            assert res[key] == expected

示例#10

0

显示文件

def test_optimization_iteration_table_dict_valued(tmp_path):
    path = tmp_path / "test.db"
    database = load_database(path=path)
    first_eval = {
        "output": {"contributions": np.ones(3), "value": 5, "bla": pd.DataFrame()}
    }
    make_optimization_iteration_table(database, first_eval=first_eval)
    for col in ["contributions", "bla"]:
        assert isinstance(
            database.tables["optimization_iterations"].columns[col].type, PickleType
        )
    assert isinstance(
        database.tables["optimization_iterations"].columns["value"].type, Float
    )

示例#11

0

显示文件

文件： read_log.py 项目： OpenSourceEconomics/estimagic

def _load_database(path_or_database):
    """Get an sqlalchemy.MetaDate object from path or database."""

    res = {"path": None, "metadata": None, "fast_logging": False}
    if isinstance(path_or_database, MetaData):
        res = path_or_database
    elif isinstance(path_or_database, (Path, str)):
        path = Path(path_or_database)
        if not path.exists():
            raise FileNotFoundError(f"No such database file: {path}")
        res = load_database(path=path)
    else:
        raise ValueError(
            "path_or_database must be a path or sqlalchemy.MetaData object")
    return res

示例#12

0

显示文件

def test_read_table(tmp_path, iteration_data):
    path = tmp_path / "test.db"
    database = load_database(path=path)
    make_optimization_iteration_table(database, first_eval={"output": 0.5})
    for i in range(1, 11):  # sqlalchemy starts counting at 1
        iteration_data["value"] = i
        iteration_data["step"] = i % 2
        append_row(iteration_data, "optimization_iterations", database, path, False)

    table = read_table(
        database=database,
        table_name="optimization_iterations",
        return_type="dict_of_lists",
    )

    assert table["rowid"] == list(range(1, 11))
    assert table["step"] == [1, 0] * 5

示例#13

0

显示文件

def test_read_last_rows_stride(tmp_path, iteration_data):
    path = tmp_path / "test.db"
    database = load_database(path=path)
    make_optimization_iteration_table(database, first_eval={"output": 0.5})
    for i in range(1, 11):  # sqlalchemy starts counting at 1
        iteration_data["value"] = i
        append_row(iteration_data, "optimization_iterations", database, path, False)

    res = read_last_rows(
        database=database,
        table_name="optimization_iterations",
        n_rows=3,
        return_type="dict_of_lists",
        stride=2,
    )["value"]

    expected = [6.0, 8.0, 10.0]
    assert res == expected

示例#14

0

显示文件

def test_update_row(tmp_path, iteration_data):
    path = tmp_path / "test.db"
    database = load_database(path=path)
    make_optimization_iteration_table(database, first_eval={"output": 0.5})
    for i in range(1, 11):  # sqlalchemy starts counting at 1
        iteration_data["value"] = i
        append_row(iteration_data, "optimization_iterations", database, path, False)

    update_row({"value": 20}, 8, "optimization_iterations", database, path, False)

    res = read_new_rows(
        database=database,
        table_name="optimization_iterations",
        last_retrieved=3,
        return_type="dict_of_lists",
    )[0]["value"]

    expected = [4, 5, 6, 7, 20, 9, 10]
    assert res == expected

示例#15

0

显示文件

def test_read_last_rows_with_step(tmp_path, iteration_data):
    path = tmp_path / "test.db"
    database = load_database(path=path)
    make_optimization_iteration_table(database, first_eval={"output": 0.5})
    for i in range(1, 11):  # sqlalchemy starts counting at 1
        iteration_data["value"] = i
        iteration_data["step"] = i % 2
        append_row(iteration_data, "optimization_iterations", database, path, False)

    res = read_last_rows(
        database=database,
        table_name="optimization_iterations",
        n_rows=20,
        return_type="dict_of_lists",
        step=0,
    )

    expected = [2, 4, 6, 8, 10]
    assert res["rowid"] == expected

示例#16

0

显示文件

def read_start_params(path_or_database):
    """Load the start parameters DataFrame.

    Args:
        path_or_database (pathlib.Path, str or sqlalchemy.MetaData)

    Returns:
        params (pd.DataFrame): see :ref:`params`.

    """
    database = load_database(**_process_path_or_database(path_or_database))
    optimization_problem = read_last_rows(
        database=database,
        table_name="optimization_problem",
        n_rows=1,
        return_type="dict_of_lists",
    )
    start_params = optimization_problem["params"][0]
    return start_params

示例#17

0

显示文件

文件： optimize.py 项目： yradeva93/estimagic

def _create_and_initialize_database(logging, log_options, first_eval,
                                    problem_data):

    # extract information
    path = logging
    fast_logging = log_options.get("fast_logging", False)
    if_exists = log_options.get("if_exists", "extend")
    save_all_arguments = log_options.get("save_all_arguments", False)
    database = load_database(path=path, fast_logging=fast_logging)

    # create the optimization_iterations table
    make_optimization_iteration_table(
        database=database,
        first_eval=first_eval,
        if_exists=if_exists,
    )

    # create and initialize the optimization_status table
    make_optimization_status_table(database, if_exists)
    append_row({"status": "running"}, "optimization_status", database, path,
               fast_logging)

    # create_and_initialize the optimization_problem table
    make_optimization_problem_table(database, if_exists, save_all_arguments)
    if not save_all_arguments:
        not_saved = [
            "criterion",
            "criterion_kwargs",
            "constraints",
            "derivative",
            "derivative_kwargs",
            "criterion_and_derivative",
            "criterion_and_derivative_kwargs",
        ]
        problem_data = {
            key: val
            for key, val in problem_data.items() if key not in not_saved
        }
    append_row(problem_data, "optimization_problem", database, path,
               fast_logging)

    return database

示例#18

0

显示文件

文件： read_log.py 项目： OpenSourceEconomics/estimagic

def read_optimization_histories(path_or_database):
    """Read a histories out values, parameters and other information."""
    database = load_database(**_process_path_or_database(path_or_database))

    start_params = read_start_params(path_or_database)

    raw_res, _ = read_new_rows(
        database=database,
        table_name="optimization_iterations",
        last_retrieved=0,
        return_type="dict_of_lists",
    )

    params_history = pd.DataFrame(raw_res["params"],
                                  columns=start_params.index)
    value_history = pd.Series(raw_res["value"])

    metadata = pd.DataFrame()
    metadata["timestamps"] = raw_res["timestamp"]
    metadata["valid"] = raw_res["valid"]
    metadata["has_value"] = value_history.notnull()
    metadata["has_derivative"] = [
        d is not None for d in raw_res["internal_derivative"]
    ]

    histories = {
        "values": value_history.dropna(),
        "params": params_history,
        "metadata": metadata,
    }

    if "contributions" in raw_res:
        first_contrib = raw_res["contributions"][0]
        if isinstance(first_contrib, pd.Series):
            columns = first_contrib.index
        else:
            columns = None
        contributions_history = pd.DataFrame(raw_res["contributions"],
                                             columns=columns).dropna()
        histories["contributions"] = contributions_history

    return histories

示例#19

0

显示文件

文件： read_log.py 项目： OpenSourceEconomics/estimagic

def read_steps_table(path_or_database):
    """Load the start parameters DataFrame.

    Args:
        path_or_database (pathlib.Path, str or sqlalchemy.MetaData)

    Returns:
        params (pd.DataFrame): see :ref:`params`.

    """
    database = load_database(**_process_path_or_database(path_or_database))
    steps_table, _ = read_new_rows(
        database=database,
        table_name="steps",
        last_retrieved=0,
        return_type="list_of_dicts",
    )
    steps_df = pd.DataFrame(steps_table)

    return steps_df

示例#20

0

显示文件

def test_all_steps_occur_in_optimization_iterations_if_no_convergence(params):
    options = {"convergence_max_discoveries": np.inf}

    minimize(
        criterion=sos_dict_criterion,
        params=params,
        algorithm="scipy_lbfgsb",
        multistart=True,
        multistart_options=options,
        logging="logging.db",
    )

    database = load_database(path="logging.db")
    iterations, _ = read_new_rows(
        database=database,
        table_name="optimization_iterations",
        last_retrieved=0,
        return_type="dict_of_lists",
    )

    present_steps = set(iterations["step"])

    assert present_steps == {1, 2, 3, 4, 5}

示例#21

0

显示文件

def dashboard_app(
    doc,
    session_data,
    updating_options,
):
    """Create plots showing the development of the criterion and parameters.

    Args:
        doc (bokeh.Document): Argument required by bokeh.
        session_data (dict): Infos to be passed between and within apps.
            Keys of this app's entry are:
            - last_retrieved (int): last iteration currently in the ColumnDataSource.
            - database_path (str or pathlib.Path)
            - callbacks (dict): dictionary to be populated with callbacks.
        updating_options (dict): Specification how to update the plotting data.
            It contains rollover, update_frequency, update_chunk, jump and stride.

    """
    # style the Document
    template_folder = Path(__file__).resolve().parent
    # conversion to string from pathlib Path is necessary for FileSystemLoader
    env = Environment(loader=FileSystemLoader(str(template_folder)))
    doc.template = env.get_template("index.html")

    # process inputs
    database = load_database(path=session_data["database_path"])
    start_point = _calculate_start_point(database, updating_options)
    session_data["last_retrieved"] = start_point
    start_params = read_start_params(path_or_database=database)
    start_params["id"] = _create_id_column(start_params)
    group_to_param_ids = _map_group_to_other_column(start_params, "id")
    group_to_param_names = _map_group_to_other_column(start_params, "name")
    criterion_history, params_history = _create_cds_for_dashboard(
        group_to_param_ids)

    # create elements
    title_text = """<h1 style="font-size:30px;">estimagic Dashboard</h1>"""
    title = Row(
        children=[Div(
            text=title_text,
            sizing_mode="scale_width",
        )],
        name="title",
        margin=(5, 5, -20, 5),
    )
    plots = _create_initial_plots(
        criterion_history=criterion_history,
        params_history=params_history,
        group_to_param_ids=group_to_param_ids,
        group_to_param_names=group_to_param_names,
    )

    restart_button = _create_restart_button(
        doc=doc,
        database=database,
        session_data=session_data,
        start_params=start_params,
        updating_options=updating_options,
    )
    button_row = Row(
        children=[restart_button],
        name="button_row",
    )

    # add elements to bokeh Document
    grid = Column(children=[title, button_row, *plots],
                  sizing_mode="stretch_width")
    doc.add_root(grid)

    # start the convergence plot immediately
    # this must happen here befo
    restart_button.active = True

示例#22

0

显示文件

文件： test_monitoring_callbacks.py 项目： yradeva93/estimagic

def test_update_monitoring_tab():
    # note: this test database does not include None in the value column.
    # it has only 7 entries.
    db_path = Path(__file__).parent / "db1.db"
    database = load_database(metadata=None, path=db_path)

    crit_data = {"iteration": [3, 5], "criterion": [-10, -10]}
    criterion_cds = ColumnDataSource(crit_data)

    param_data = {f"p{i}": [i, i, i] for i in range(6)}
    param_data["iteration"] = [3, 4, 5]
    plotted_param_data = {
        k: v
        for k, v in param_data.items() if k in ["p0", "p2", "p4", "iteration"]
    }
    param_cds = ColumnDataSource(plotted_param_data)

    start_params = pd.DataFrame()
    start_params["group"] = ["g1", "g1", None, None, "g2", "g2"]
    start_params["id"] = [f"p{i}" for i in range(6)]

    session_data = {"last_retrieved": 5}
    tables = []  # not used
    rollover = 500
    update_chunk = 5

    expected_crit_data = {
        "iteration": [3, 5, 6, 7],
        "criterion":
        [-10, -10] + [3.371916994681647e-18, 3.3306686770405823e-18],
    }

    expected_param_data = plotted_param_data.copy()
    expected_param_data["iteration"] += [6, 7]
    expected_param_data["p0"] += [
        -7.82732387e-10,
        -7.45058016e-10,
    ]
    expected_param_data["p2"] += [
        -7.50570405e-10,
        -7.45058015e-10,
    ]
    expected_param_data["p4"] += [
        -7.44958198e-10,
        -7.45058015e-10,
    ]

    _update_monitoring_tab(
        database=database,
        criterion_cds=criterion_cds,
        param_cds=param_cds,
        session_data=session_data,
        tables=tables,
        rollover=rollover,
        start_params=start_params,
        update_chunk=update_chunk,
        stride=1,
    )

    assert session_data["last_retrieved"] == 7
    assert criterion_cds.data == expected_crit_data
    assert param_cds.data == expected_param_data

示例#23

0

显示文件

def test_load_database_with_bound_metadata(tmp_path):
    """Test that nothing happens when load_database is called with bound MetaData."""
    path = tmp_path / "test.db"
    database = load_database(path=path)
    new_database = load_database(metadata=database)
    assert new_database is database

示例#24

0

显示文件

文件： monitoring_app.py 项目： yradeva93/estimagic

def monitoring_app(
    doc,
    database_name,
    session_data,
    updating_options,
    start_immediately,
):
    """Create plots showing the development of the criterion and parameters.

    Args:
        doc (bokeh.Document): Argument required by bokeh.
        database_name (str): Short and unique name of the database.
        session_data (dict): Infos to be passed between and within apps.
            Keys of this app's entry are:
            - last_retrieved (int): last iteration currently in the ColumnDataSource.
            - database_path (str or pathlib.Path)
            - callbacks (dict): dictionary to be populated with callbacks.
        updating_options (dict): Specification how to update the plotting data.
            It contains rollover, update_frequency, update_chunk, jump and stride.

    """
    # style the Document
    template_folder = Path(__file__).resolve().parent
    # conversion to string from pathlib Path is necessary for FileSystemLoader
    env = Environment(loader=FileSystemLoader(str(template_folder)))
    doc.template = env.get_template("index.html")

    # process inputs
    database = load_database(path=session_data["database_path"])
    start_point = _calculate_start_point(database, updating_options)
    session_data["last_retrieved"] = start_point
    start_params = read_start_params(path_or_database=database)
    start_params["id"] = _create_id_column(start_params)
    group_to_param_ids = _map_group_to_other_column(start_params, "id")
    group_to_param_names = _map_group_to_other_column(start_params, "name")
    criterion_history, params_history = _create_cds_for_monitoring_app(
        group_to_param_ids)

    # create elements
    button_row = _create_button_row(
        doc=doc,
        database=database,
        session_data=session_data,
        start_params=start_params,
        updating_options=updating_options,
    )
    monitoring_plots = _create_initial_convergence_plots(
        criterion_history=criterion_history,
        params_history=params_history,
        group_to_param_ids=group_to_param_ids,
        group_to_param_names=group_to_param_names,
    )

    # add elements to bokeh Document
    grid = Column(children=[button_row, *monitoring_plots],
                  sizing_mode="stretch_width")
    convergence_tab = Panel(child=grid, title="Convergence Tab")
    tabs = Tabs(tabs=[convergence_tab])

    doc.add_root(tabs)

    if start_immediately:
        activation_button = doc.get_model_by_name("activation_button")
        activation_button.active = True

示例#25

0

显示文件

def test_load_database_from_path(tmp_path):
    """Test that database is generated because it does not exist."""
    path = tmp_path / "test.db"
    database = load_database(path=path)
    assert isinstance(database, sqlalchemy.MetaData)
    assert database.bind is not None