Exemplo n.º 1
0
def calculate_fundamental_sensitivity_to_removal(jac, moments_cov,
                                                 params_cov_opt):
    """calculate the fundamental sensitivity to removal.

    The sensitivity measure is calculated for each parameter wrt each moment.

    It answers the following question: How much precision would be lost if the kth
        moment was excluded from the estimation with if the optimal weighting matrix is
        used?

    Args:
        jac (np.ndarray or pandas.DataFrame): The jacobian of simulate_moments with
            respect to params, evaluated at the  point estimates.
        weights (np.ndarray or pandas.DataFrame): The weighting matrix used for
            msm estimation.
        moments_cov (numpy.ndarray or pandas.DataFrame): The covariance matrix of the
            empirical moments.
        params_cov_opt (numpy.ndarray or pandas.DataFrame): The covariance matrix of the
            parameter estimates. Note that this needs to be the parameter covariance
            matrix using the formula for asymptotically optimal MSM.

    Returns:
        np.ndarray or pd.DataFrame: Sensitivity measure with shape (n_params, n_moments)

    """
    _jac, _moments_cov, _params_cov_opt, names = process_pandas_arguments(
        jac=jac,
        moments_cov=moments_cov,
        params_cov_opt=params_cov_opt,
    )
    m5 = []

    for k in range(len(_moments_cov)):
        g_k = np.copy(_jac)
        g_k = np.delete(g_k, k, axis=0)

        s_k = np.copy(_moments_cov)
        s_k = np.delete(s_k, k, axis=0)
        s_k = np.delete(s_k, k, axis=1)

        sigma_k = _sandwich(g_k, robust_inverse(s_k, INVALID_SENSITIVITY_MSG))
        sigma_k = robust_inverse(sigma_k, INVALID_SENSITIVITY_MSG)

        m5k = sigma_k - _params_cov_opt
        m5k = m5k.diagonal()

        m5.append(m5k)

    m5 = np.array(m5).T

    params_variances = np.diagonal(_params_cov_opt)
    e5 = m5 / params_variances.reshape(-1, 1)

    if names:
        e5 = pd.DataFrame(e5,
                          index=names.get("params"),
                          columns=names.get("moments"))

    return e5
Exemplo n.º 2
0
def cov_hessian(hess):
    """Covariance based on the negative inverse of the hessian of loglike.

    While this method makes slightly weaker statistical assumptions than a covariance
    estimate based on the outer product of gradients, it is numerically much more
    problematic for the following reasons:

    - It is much more difficult to estimate a hessian numerically or with automatic
      differentiation than it is to estimate the gradient / jacobian
    - The resulting hessian might not be positive definite and thus not invertible.

    Args:
        hess (numpy.ndarray): 2d array hessian matrix of dimension (nparams, nparams)

    Returns:
       numpy.ndarray: covariance matrix (nparams, nparams)


    Resources: Marno Verbeek - A guide to modern econometrics :cite:`Verbeek2008`

    """
    _hess, names = process_pandas_arguments(hess=hess)
    info_matrix = -1 * _hess
    cov = robust_inverse(info_matrix, msg=INVALID_INFERENCE_MSG)

    if "params" in names:
        cov = pd.DataFrame(cov, columns=names["params"], index=names["params"])

    return cov
Exemplo n.º 3
0
def calculate_sensitivity_to_bias(jac, weights):
    """calculate the sensitivity to bias.

    The sensitivity measure is calculated for each parameter wrt each moment.

    It answers the following question: How strongly would the parameter estimates be
        biased if the kth moment was misspecified, i.e not zero in expectation?

    Args:
        jac (np.ndarray or pandas.DataFrame): The jacobian of simulate_moments with
            respect to params, evaluated at the  point estimates.
        weights (np.ndarray or pandas.DataFrame): The weighting matrix used for
            msm estimation.

    Returns:
        np.ndarray or pd.DataFrame: Sensitivity measure with shape (n_params, n_moments)

    """
    _jac, _weights, names = process_pandas_arguments(jac=jac, weights=weights)
    gwg = _sandwich(_jac, _weights)
    gwg_inverse = robust_inverse(gwg, INVALID_SENSITIVITY_MSG)
    m1 = -gwg_inverse @ _jac.T @ _weights

    if names:
        m1 = pd.DataFrame(m1,
                          index=names.get("params"),
                          columns=names.get("moments"))

    return m1
Exemplo n.º 4
0
def cov_robust(jac, weights, moments_cov):
    """Calculate the cov of msm estimates with asymptotically non-efficient weights.

    Note that asymptotically non-efficient weights are typically preferrable because
    they lead to less finite sample bias.

    Args:
        jac (np.ndarray or pandas.DataFrame): Numpy array or DataFrame with the jacobian
            of simulate_moments with respect to params. The derivative needs to be taken
            at the estimated parameters. Has shape n_moments, n_params.
        weights (np.ndarray): The weighting matrix for msm estimation.
        moments_cov (np.ndarray): The covariance matrix of the empirical moments.

    Returns:
        numpy.ndarray: numpy array with covariance matrix.

    """
    _jac, _weights, _moments_cov, names = process_pandas_arguments(
        jac=jac, weights=weights, moments_cov=moments_cov)

    bread = robust_inverse(
        _jac.T @ _weights @ _jac,
        msg=INVALID_INFERENCE_MSG,
    )

    butter = _jac.T @ _weights @ _moments_cov @ _weights @ _jac

    cov = bread @ butter @ bread

    if names:
        cov = pd.DataFrame(cov,
                           columns=names.get("params"),
                           index=names.get("params"))

    return cov
Exemplo n.º 5
0
def cov_optimal(jac, weights):
    """Calculate the cov of msm estimates with asymptotically efficient weights.

    Note that asymptotically efficient weights have substantial finite sample
    bias and are typically not a good choice.

    Args:
        jac (np.ndarray or pandas.DataFrame): Numpy array or DataFrame with the jacobian
            of simulate_moments with respect to params. The derivative needs to be taken
            at the estimated parameters. Has shape n_moments, n_params.
        weights (np.ndarray): The weighting matrix for msm estimation.
        moments_cov (np.ndarray): The covariance matrix of the empirical moments.

    Returns:
        numpy.ndarray: numpy array with covariance matrix.

    """
    _jac, _weights, names = process_pandas_arguments(jac=jac, weights=weights)

    cov = robust_inverse(_jac.T @ _weights @ _jac, msg=INVALID_INFERENCE_MSG)

    if names:
        cov = pd.DataFrame(cov,
                           columns=names.get("params"),
                           index=names.get("params"))

    return cov
Exemplo n.º 6
0
def cov_robust(jac, hess):
    """Covariance of parameters based on HJJH dot product.

    H stands for Hessian of the log likelihood function and J for Jacobian,
    of the log likelihood per individual.

    Args:
        jac (numpy.ndarray): 2d array jacobian matrix of dimension (nobs, nparams)
        hess (numpy.ndarray): 2d array hessian matrix of dimension (nparams, nparams)


    Returns:
        numpy.ndarray: covariance HJJH matrix (nparams, nparams)

    Resources:
        https://tinyurl.com/yym5d4cw

    """
    _jac, _hess, names = process_pandas_arguments(jac=jac, hess=hess)

    info_matrix_hess = -_hess
    cov_hess = robust_inverse(info_matrix_hess, msg=INVALID_INFERENCE_MSG)

    cov = cov_hess @ _jac.T @ _jac @ cov_hess

    if "params" in names:
        cov = pd.DataFrame(cov, columns=names["params"], index=names["params"])

    return cov
Exemplo n.º 7
0
def get_weighting_matrix(moments_cov,
                         method,
                         empirical_moments,
                         clip_value=1e-6,
                         return_type="pytree"):
    """Calculate a weighting matrix from moments_cov.

    Args:
        moments_cov (pandas.DataFrame or numpy.ndarray): Square DataFrame or Array
            with the covariance matrix of the moment conditions for msm estimation.
        method (str): One of "optimal", "diagonal".
        empirical_moments (pytree): Pytree containing empirical moments. Used to get
            the tree structure
        clip_value (float): Bound at which diagonal elements of the moments_cov are
            clipped to avoid dividing by zero.
        return_type (str): One of "pytree", "array" or "pytree_and_array"

    Returns:
        pandas.DataFrame or numpy.ndarray: Weighting matrix with the same shape as
            moments_cov.

    """
    fast_path = isinstance(moments_cov, np.ndarray) and moments_cov.ndim == 2

    if fast_path:
        _internal_cov = moments_cov
    else:
        _internal_cov = block_tree_to_matrix(
            moments_cov,
            outer_tree=empirical_moments,
            inner_tree=empirical_moments,
        )

    if method == "optimal":
        array_weights = robust_inverse(_internal_cov)
    elif method == "diagonal":
        diagonal_values = 1 / np.clip(np.diagonal(_internal_cov), clip_value,
                                      np.inf)
        array_weights = np.diag(diagonal_values)
    else:
        raise ValueError(f"Invalid method: {method}")

    if return_type == "array" or (fast_path and "_and_" not in return_type):
        out = array_weights
    elif fast_path:
        out = (array_weights, array_weights)
    else:
        tree_weights = matrix_to_block_tree(
            array_weights,
            outer_tree=empirical_moments,
            inner_tree=empirical_moments,
        )
        if return_type == "pytree":
            out = tree_weights
        else:
            out = (tree_weights, array_weights)

    return out
Exemplo n.º 8
0
def _sandwich_step(hess, meat):
    """The sandwich estimator for variance estimation.

    This is used in several robust covariance formulae.

    Args:
        hess (np.array): "hessian" - a k + 1 x k + 1-dimensional array of
            second derivatives of the pseudo-log-likelihood function w.r.t.
            the parameters
        meat (np.array): the variance of the total scores

    Returns:
        se (np.array): a 1d array of k + 1 standard errors
        var (np.array): 2d variance-covariance matrix

    """
    invhessian = robust_inverse(hess, INVALID_INFERENCE_MSG)
    var = np.dot(np.dot(invhessian, meat), invhessian)
    return var
Exemplo n.º 9
0
def cov_jacobian(jac):
    """Covariance based on outer product of jacobian of loglikeobs.

    Args:
        jac (numpy.ndarray): 2d array jacobian matrix of dimension (nobs, nparams)

    Returns:
        numpy.ndarray: covariance matrix of size (nparams, nparams)


    Resources: Marno Verbeek - A guide to modern econometrics.

    """
    _jac, names = process_pandas_arguments(jac=jac)

    info_matrix = np.dot((_jac.T), _jac)
    cov = robust_inverse(info_matrix, msg=INVALID_INFERENCE_MSG)

    if "params" in names:
        cov = pd.DataFrame(cov, columns=names["params"], index=names["params"])

    return cov
Exemplo n.º 10
0
def test_robust_inverse_singular():
    mat = np.zeros((5, 5))
    expected = np.zeros((5, 5))
    with pytest.warns(UserWarning, match="LinAlgError"):
        calculated = robust_inverse(mat)
    aaae(calculated, expected)
Exemplo n.º 11
0
def test_robust_inverse_nonsingular():
    mat = np.eye(3) + 0.2
    expected = np.linalg.inv(mat)
    calculated = robust_inverse(mat)
    aaae(calculated, expected)
Exemplo n.º 12
0
def calculate_sensitivity_to_weighting(jac, weights, moments_cov, params_cov):
    """calculate the sensitivity to weighting.

    The sensitivity measure is calculated for each parameter wrt each moment.

    It answers the following question: How would the precision change if the weight of
        the kth moment is increased a little?

    Args:
        sensitivity_to_bias (np.ndarray or pandas.DataFrame): See
            ``calculate_sensitivity_to_bias`` for details.
        weights (np.ndarray or pandas.DataFrame): The weighting matrix used for
            msm estimation.
        moments_cov (numpy.ndarray or pandas.DataFrame): The covariance matrix of the
            empirical moments.
        params_cov (numpy.ndarray or pandas.DataFrame): The covariance matrix of the
            parameter estimates.

    Returns:
        np.ndarray or pd.DataFrame: Sensitivity measure with shape (n_params, n_moments)

    """
    _jac, _weights, _moments_cov, _params_cov, names = process_pandas_arguments(
        jac=jac,
        weights=weights,
        moments_cov=moments_cov,
        params_cov=params_cov)
    gwg_inverse = _sandwich(_jac, _weights)
    gwg_inverse = robust_inverse(gwg_inverse, INVALID_SENSITIVITY_MSG)

    m6 = []

    for k in range(len(_weights)):
        mask_matrix_o = np.zeros(shape=_weights.shape)
        mask_matrix_o[k, k] = 1

        m6k_1 = gwg_inverse @ _sandwich(_jac, mask_matrix_o) @ _params_cov
        m6k_2 = (gwg_inverse @ _jac.T @ mask_matrix_o @ _moments_cov @ _weights
                 @ _jac @ gwg_inverse)
        m6k_3 = (gwg_inverse @ _jac.T @ _weights @ _moments_cov @ mask_matrix_o
                 @ _jac @ gwg_inverse)
        m6k_4 = _params_cov @ _sandwich(_jac, mask_matrix_o) @ gwg_inverse

        m6k = -m6k_1 + m6k_2 + m6k_3 - m6k_4
        m6k = m6k.diagonal()

        m6.append(m6k)

    m6 = np.array(m6).T

    weights_diagonal = np.diagonal(_weights)
    params_variances = np.diagonal(_params_cov)

    e6 = m6 / params_variances.reshape(-1, 1)
    e6 = e6 * weights_diagonal

    if names:
        e6 = pd.DataFrame(e6,
                          index=names.get("params"),
                          columns=names.get("moments"))

    return e6