Пример #1
0
    def summary(self, ci_method="percentile", ci_level=0.95):
        """Create a summary of bootstrap results.

        Args:
            ci_method (str): Method of choice for confidence interval computation.
                The default is "percentile".
            ci_level (float): Confidence level for the calculation of confidence
                intervals. The default is 0.95.

        Returns:
            pd.DataFrame: The estimation summary as a DataFrame containing information
                on the mean, standard errors, as well as the confidence intervals.
                Soon this will be a pytree.
        """
        registry = get_registry(extended=True)
        names = leaf_names(self.base_outcome, registry=registry)
        summary_data = _calulcate_summary_data_bootstrap(self,
                                                         ci_method=ci_method,
                                                         ci_level=ci_level)
        summary = calculate_estimation_summary(
            summary_data=summary_data,
            names=names,
            free_names=names,
        )
        return summary
Пример #2
0
    def cov(self, return_type="pytree"):
        """Calculate the variance-covariance matrix of the estimated parameters.

        Args:
            return_type (str): One of "pytree", "array" or "dataframe". Default pytree.
                If "array", a 2d numpy array with the covariance is returned. If
                "dataframe", a pandas DataFrame with parameter names in the
                index and columns are returned.
                The default is "pytree".

        Returns:
            Any: The covariance matrix of the estimated parameters as a block-pytree,
                numpy.ndarray, or pandas.DataFrame.
        """
        cov = self._internal_cov

        if return_type == "dataframe":
            registry = get_registry(extended=True)
            names = np.array(leaf_names(self._base_outcome, registry=registry))
            cov = pd.DataFrame(cov, columns=names, index=names)
        elif return_type == "pytree":
            cov = matrix_to_block_tree(cov, self._base_outcome,
                                       self._base_outcome)
        elif return_type != "array":
            raise ValueError(
                "return_type must be one of pytree, array, or dataframe, "
                f"not {return_type}.")
        return cov
Пример #3
0
def _update_bounds_and_flatten(nan_tree, bounds, direction):
    registry = get_registry(extended=True, data_col=direction)
    flat_nan_tree = tree_leaves(nan_tree, registry=registry)

    if bounds is not None:

        registry = get_registry(extended=True)
        flat_bounds = tree_leaves(bounds, registry=registry)

        seperator = 10 * "$"
        params_names = leaf_names(nan_tree,
                                  registry=registry,
                                  separator=seperator)
        bounds_names = leaf_names(bounds,
                                  registry=registry,
                                  separator=seperator)

        flat_nan_dict = dict(zip(params_names, flat_nan_tree))

        invalid = {"names": [], "bounds": []}
        for bounds_name, bounds_leaf in zip(bounds_names, flat_bounds):

            # if a bounds leaf is None we treat it as saying the the corresponding
            # subtree of params has no bounds.
            if bounds_leaf is not None:
                if bounds_name in flat_nan_dict:
                    flat_nan_dict[bounds_name] = bounds_leaf
                else:
                    invalid["names"].append(bounds_name)
                    invalid["bounds"].append(bounds_leaf)

        if invalid["bounds"]:
            msg = (
                f"{direction} could not be matched to params pytree. The bounds "
                f"{invalid['bounds']} with names {invalid['names']} are not part of "
                "params.")
            raise InvalidBoundsError(msg)

        flat_nan_tree = list(flat_nan_dict.values())

    updated = np.array(flat_nan_tree, dtype=np.float64)
    return updated
def get_params_groups_and_short_names(params, free_mask, max_group_size=8):
    """Create parameter groups and short names.

    Args:
        params (pytree): parameters as supplied by the user.
        free_mask (np.array): 1d boolean array of same length as params, identifying
            the free parameters.
        max_group_size (int): maximal allowed size of a group. Groups that are larger
            than this will be split.

    Returns:
        groups (list): list of strings and None. For each entry in flat params the key
            of the group to which the parameter belongs. None if the parameter is not
            free.
        names (list): list of the parameter names to be displayed in the dashboard.

    """
    sep = "$$$+++"
    registry = get_registry(extended=True)
    paths = leaf_names(params, registry=registry, separator=sep)
    split_paths = [path.split(sep) for path in paths]

    groups = []
    names = []
    for path_list, is_free in zip(split_paths, free_mask):
        group, name = _get_group_and_name(path_list, is_free)
        groups.append(group)
        names.append(name)

    # if every parameter has its own group, they should all actually be in one group
    if len(pd.unique(groups)) == len(groups):
        groups = ["Parameters"] * len(groups)

    groups = groups
    counts = pd.value_counts(groups)
    to_be_split = counts[counts > max_group_size]
    for group_name, n_occurrences in to_be_split.items():
        split_group_names = _split_long_group(
            group_name=group_name,
            n_occurrences=n_occurrences,
            max_group_size=max_group_size,
        )
        groups = _replace_too_common_groups(groups, group_name,
                                            split_group_names)

    return groups, names
Пример #5
0
            )
        else:
            raise ValueError(f"Invalid kind: {kind}")

        if return_type == "array":
            out = raw
        elif return_type == "pytree":
            out = matrix_to_block_tree(
                raw,
                outer_tree=self._params,
                inner_tree=self._empirical_moments,
            )
        elif return_type == "dataframe":
            registry = get_registry(extended=True)
            row_names = self._internal_estimates.names
            col_names = leaf_names(self._empirical_moments, registry=registry)
            out = pd.DataFrame(
                data=raw,
                index=row_names,
                columns=col_names,
            )
        else:
            msg = (
                f"Invalid return type: {return_type}. Valid are 'pytree', 'array' "
                "and 'dataframe'")
            raise ValueError(msg)
        return out

    def to_pickle(self, path):
        """Save the MomentsResult object to pickle.
def get_tree_converter(
    params,
    lower_bounds,
    upper_bounds,
    func_eval,
    primary_key,
    derivative_eval=None,
    soft_lower_bounds=None,
    soft_upper_bounds=None,
    add_soft_bounds=False,
):
    """Get flatten and unflatten functions for criterion and its derivative.

    The function creates a converter with methods to convert parameters, derivatives
    and the output of the criterion function between the user provided pytree structure
    and flat representations.

    The main motivation for bundling all of this together (as opposed to handling
    parameters, derivatives and function outputs separately) is that the derivative
    conversion needs to know about the structure of params and the criterion output.

    Args:
        params (pytree): The user provided parameters.
        lower_bounds (pytree): The user provided lower_bounds
        upper_bounds (pytree): The user provided upper bounds
        func_eval (float, dict or pytree): An evaluation of ``func`` at ``params``.
            Used to deterimine how the function output has to be transformed for the
            optimizer.
        primary_key (str): One of "value", "contributions" and "root_contributions".
            Used to determine how the function and derivative output has to be
            transformed for the optimzer.
        derivative_eval (dict, pytree or None): Evaluation of the derivative of
            func at params. Used for consistency checks.
        soft_lower_bounds (pytree): As lower_bounds
        soft_upper_bounds (pytree): As upper_bounds
        add_soft_bounds (bool): Whether soft bounds should be added to the flat_params

    Returns:
        TreeConverter: NamedTuple with flatten and unflatten methods.
        FlatParams: NamedTuple of 1d arrays with flattened bounds and param names.

    """
    _registry = get_registry(extended=True)
    _params_vec, _params_treedef = tree_flatten(params, registry=_registry)
    _params_vec = np.array(_params_vec).astype(float)
    _lower, _upper = get_bounds(
        params=params,
        lower_bounds=lower_bounds,
        upper_bounds=upper_bounds,
        registry=_registry,
    )

    if add_soft_bounds:
        _soft_lower, _soft_upper = get_bounds(
            params=params,
            lower_bounds=lower_bounds,
            upper_bounds=upper_bounds,
            registry=_registry,
            soft_lower_bounds=soft_lower_bounds,
            soft_upper_bounds=soft_upper_bounds,
            add_soft_bounds=add_soft_bounds,
        )
    else:
        _soft_lower, _soft_upper = None, None

    _param_names = leaf_names(params, registry=_registry)

    flat_params = FlatParams(
        values=_params_vec,
        lower_bounds=_lower,
        upper_bounds=_upper,
        names=_param_names,
        soft_lower_bounds=_soft_lower,
        soft_upper_bounds=_soft_upper,
    )

    _params_flatten = _get_params_flatten(registry=_registry)
    _params_unflatten = _get_params_unflatten(
        registry=_registry, treedef=_params_treedef
    )
    _func_flatten = _get_func_flatten(
        registry=_registry,
        func_eval=func_eval,
        primary_key=primary_key,
    )
    _derivative_flatten = _get_derivative_flatten(
        registry=_registry,
        primary_key=primary_key,
        params=params,
        func_eval=func_eval,
        derivative_eval=derivative_eval,
    )

    converter = TreeConverter(
        params_flatten=_params_flatten,
        params_unflatten=_params_unflatten,
        func_flatten=_func_flatten,
        derivative_flatten=_derivative_flatten,
    )

    return converter, flat_params
Пример #7
0
def dashboard_app(
    doc,
    session_data,
    updating_options,
):
    """Create plots showing the development of the criterion and parameters.

    Args:
        doc (bokeh.Document): Argument required by bokeh.
        session_data (dict): Infos to be passed between and within apps.
            Keys of this app's entry are:
            - last_retrieved (int): last iteration currently in the ColumnDataSource.
            - database_path (str or pathlib.Path)
            - callbacks (dict): dictionary to be populated with callbacks.
        updating_options (dict): Specification how to update the plotting data.
            It contains rollover, update_frequency, update_chunk, jump and stride.

    """
    # style the Document
    template_folder = Path(__file__).resolve().parent
    # conversion to string from pathlib Path is necessary for FileSystemLoader
    env = Environment(loader=FileSystemLoader(str(template_folder)))
    doc.template = env.get_template("index.html")

    # process inputs
    database = load_database(path=session_data["database_path"])
    start_point = _calculate_start_point(database, updating_options)
    session_data["last_retrieved"] = start_point

    # build start_params DataFrame
    registry = get_registry(extended=True)
    start_params_tree = read_start_params(path_or_database=database)
    internal_params = tree_just_flatten(tree=start_params_tree, registry=registry)
    full_names = leaf_names(start_params_tree, registry=registry)

    optimization_problem = read_last_rows(
        database=database,
        table_name="optimization_problem",
        n_rows=1,
        return_type="dict_of_lists",
    )
    free_mask = optimization_problem["free_mask"][0]
    params_groups, short_names = get_params_groups_and_short_names(
        params=start_params_tree, free_mask=free_mask
    )
    start_params = pd.DataFrame(
        {
            "full_name": full_names,
            "name": short_names,
            "group": params_groups,
            "value": internal_params,
        }
    )
    start_params["id"] = _create_id_column(start_params)

    group_to_param_ids = _map_group_to_other_column(start_params, "id")
    group_to_param_names = _map_group_to_other_column(start_params, "name")
    criterion_history, params_history = _create_cds_for_dashboard(group_to_param_ids)

    # create elements
    title_text = """<h1 style="font-size:30px;">estimagic Dashboard</h1>"""
    title = Row(
        children=[
            Div(
                text=title_text,
                sizing_mode="scale_width",
            )
        ],
        name="title",
        margin=(5, 5, -20, 5),
    )
    plots = _create_initial_plots(
        criterion_history=criterion_history,
        params_history=params_history,
        group_to_param_ids=group_to_param_ids,
        group_to_param_names=group_to_param_names,
    )

    restart_button = _create_restart_button(
        doc=doc,
        database=database,
        session_data=session_data,
        start_params=start_params,
        updating_options=updating_options,
    )
    button_row = Row(
        children=[restart_button],
        name="button_row",
    )

    # add elements to bokeh Document
    grid = Column(children=[title, button_row, *plots], sizing_mode="stretch_width")
    doc.add_root(grid)

    # start the convergence plot immediately
    restart_button.active = True
Пример #8
0
def test_leaf_names_partially_numeric_df(other_df):
    registry = get_registry(extended=True)
    names = leaf_names(other_df, registry=registry)
    assert names == [
        "alpha_b", "alpha_c", "beta_b", "beta_c", "gamma_b", "gamma_c"
    ]
Пример #9
0
def test_leaf_names_df_with_value_column(value_df):
    registry = get_registry(extended=True)
    names = leaf_names(value_df, registry=registry)
    assert names == ["alpha", "beta", "gamma"]
Пример #10
0
def params_plot(
    result,
    selector=None,
    max_evaluations=None,
    template=PLOTLY_TEMPLATE,
    show_exploration=False,
):
    """Plot the params history of an optimization.

    Args:
        result (Union[OptimizeResult, pathlib.Path, str]): An optimization results with
            collected history. If dict, then the key is used as the name in a legend.
        selector (callable): A callable that takes params and returns a subset
            of params. If provided, only the selected subset of params is plotted.
        max_evaluations (int): Clip the criterion history after that many entries.
        template (str): The template for the figure. Default is "plotly_white".
        show_exploration (bool): If True, exploration samples of a multistart
            optimization are visualized. Default is False.

    Returns:
        plotly.graph_objs._figure.Figure: The figure.

    """
    # ==================================================================================
    # Process inputs
    # ==================================================================================

    if isinstance(result, OptimizeResult):
        data = _extract_plotting_data_from_results_object(
            result,
            stack_multistart=True,
            show_exploration=show_exploration,
            plot_name="params_plot",
        )
        start_params = result.start_params
    elif isinstance(result, (str, Path)):
        data = _extract_plotting_data_from_database(
            result,
            stack_multistart=True,
            show_exploration=show_exploration,
        )
        start_params = data["start_params"]
    else:
        raise ValueError("result must be an OptimizeResult or a path to a log file.")

    if data["stacked_local_histories"] is not None:
        history = data["stacked_local_histories"]["params"]
    else:
        history = data["history"]["params"]

    # ==================================================================================
    # Create figure
    # ==================================================================================

    fig = go.Figure()

    registry = get_registry(extended=True)

    hist_arr = np.array([tree_just_flatten(p, registry=registry) for p in history]).T
    names = leaf_names(start_params, registry=registry)

    if selector is not None:
        flat, treedef = tree_flatten(start_params, registry=registry)
        helper = tree_unflatten(treedef, list(range(len(flat))), registry=registry)
        selected = np.array(tree_just_flatten(selector(helper), registry=registry))
        names = [names[i] for i in selected]
        hist_arr = hist_arr[selected]

    for name, data in zip(names, hist_arr):
        if max_evaluations is not None and len(data) > max_evaluations:
            data = data[:max_evaluations]

        trace = go.Scatter(
            x=np.arange(len(data)),
            y=data,
            mode="lines",
            name=name,
        )
        fig.add_trace(trace)

    fig.update_layout(
        template=template,
        xaxis_title_text="No. of criterion evaluations",
        yaxis_title_text="Parameter value",
        legend={"yanchor": "top", "xanchor": "right", "y": 0.95, "x": 0.95},
    )

    return fig
Пример #11
0
def test_calculate_estimation_summary():
    # input data
    summary_data = {
        "value": {
            "a": pd.Series([0], index=["i"]),
            "b": pd.DataFrame({
                "c1": [1],
                "c2": [2]
            }),
        },
        "standard_error": {
            "a": pd.Series([0.1], index=["i"]),
            "b": pd.DataFrame({
                "c1": [0.2],
                "c2": [0.3]
            }),
        },
        "ci_lower": {
            "a": pd.Series([-0.2], index=["i"]),
            "b": pd.DataFrame({
                "c1": [-0.4],
                "c2": [-0.6]
            }),
        },
        "ci_upper": {
            "a": pd.Series([0.2], index=["i"]),
            "b": pd.DataFrame({
                "c1": [0.4],
                "c2": [0.6]
            }),
        },
        "p_value": {
            "a": pd.Series([0.001], index=["i"]),
            "b": pd.DataFrame({
                "c1": [0.2],
                "c2": [0.07]
            }),
        },
        "free": np.array([True, True, True]),
    }

    registry = get_registry(extended=True)
    names = leaf_names(summary_data["value"], registry=registry)
    free_names = names

    # function call
    summary = calculate_estimation_summary(summary_data, names, free_names)

    # expectations
    expectation = {
        "a":
        pd.DataFrame(
            {
                "value": 0,
                "standard_error": 0.1,
                "ci_lower": -0.2,
                "ci_upper": 0.2,
                "p_value": 0.001,
                "free": True,
                "stars": "***",
            },
            index=["i"],
        ),
        "b":
        pd.DataFrame(
            {
                "value": [1, 2],
                "standard_error": [0.2, 0.3],
                "ci_lower": [-0.4, -0.6],
                "ci_upper": [0.4, 0.6],
                "p_value": [0.2, 0.7],
                "free": [True, True],
                "stars": ["", "*"],
            },
            index=pd.MultiIndex.from_tuples([(0, "c1"), (0, "c2")]),
        ),
    }

    tree_equal(summary, expectation)