def calculate_fundamental_sensitivity_to_removal(jac, moments_cov, params_cov_opt): """calculate the fundamental sensitivity to removal. The sensitivity measure is calculated for each parameter wrt each moment. It answers the following question: How much precision would be lost if the kth moment was excluded from the estimation with if the optimal weighting matrix is used? Args: jac (np.ndarray or pandas.DataFrame): The jacobian of simulate_moments with respect to params, evaluated at the point estimates. weights (np.ndarray or pandas.DataFrame): The weighting matrix used for msm estimation. moments_cov (numpy.ndarray or pandas.DataFrame): The covariance matrix of the empirical moments. params_cov_opt (numpy.ndarray or pandas.DataFrame): The covariance matrix of the parameter estimates. Note that this needs to be the parameter covariance matrix using the formula for asymptotically optimal MSM. Returns: np.ndarray or pd.DataFrame: Sensitivity measure with shape (n_params, n_moments) """ _jac, _moments_cov, _params_cov_opt, names = process_pandas_arguments( jac=jac, moments_cov=moments_cov, params_cov_opt=params_cov_opt, ) m5 = [] for k in range(len(_moments_cov)): g_k = np.copy(_jac) g_k = np.delete(g_k, k, axis=0) s_k = np.copy(_moments_cov) s_k = np.delete(s_k, k, axis=0) s_k = np.delete(s_k, k, axis=1) sigma_k = _sandwich(g_k, robust_inverse(s_k, INVALID_SENSITIVITY_MSG)) sigma_k = robust_inverse(sigma_k, INVALID_SENSITIVITY_MSG) m5k = sigma_k - _params_cov_opt m5k = m5k.diagonal() m5.append(m5k) m5 = np.array(m5).T params_variances = np.diagonal(_params_cov_opt) e5 = m5 / params_variances.reshape(-1, 1) if names: e5 = pd.DataFrame(e5, index=names.get("params"), columns=names.get("moments")) return e5
def cov_hessian(hess): """Covariance based on the negative inverse of the hessian of loglike. While this method makes slightly weaker statistical assumptions than a covariance estimate based on the outer product of gradients, it is numerically much more problematic for the following reasons: - It is much more difficult to estimate a hessian numerically or with automatic differentiation than it is to estimate the gradient / jacobian - The resulting hessian might not be positive definite and thus not invertible. Args: hess (numpy.ndarray): 2d array hessian matrix of dimension (nparams, nparams) Returns: numpy.ndarray: covariance matrix (nparams, nparams) Resources: Marno Verbeek - A guide to modern econometrics :cite:`Verbeek2008` """ _hess, names = process_pandas_arguments(hess=hess) info_matrix = -1 * _hess cov = robust_inverse(info_matrix, msg=INVALID_INFERENCE_MSG) if "params" in names: cov = pd.DataFrame(cov, columns=names["params"], index=names["params"]) return cov
def calculate_sensitivity_to_bias(jac, weights): """calculate the sensitivity to bias. The sensitivity measure is calculated for each parameter wrt each moment. It answers the following question: How strongly would the parameter estimates be biased if the kth moment was misspecified, i.e not zero in expectation? Args: jac (np.ndarray or pandas.DataFrame): The jacobian of simulate_moments with respect to params, evaluated at the point estimates. weights (np.ndarray or pandas.DataFrame): The weighting matrix used for msm estimation. Returns: np.ndarray or pd.DataFrame: Sensitivity measure with shape (n_params, n_moments) """ _jac, _weights, names = process_pandas_arguments(jac=jac, weights=weights) gwg = _sandwich(_jac, _weights) gwg_inverse = robust_inverse(gwg, INVALID_SENSITIVITY_MSG) m1 = -gwg_inverse @ _jac.T @ _weights if names: m1 = pd.DataFrame(m1, index=names.get("params"), columns=names.get("moments")) return m1
def cov_robust(jac, weights, moments_cov): """Calculate the cov of msm estimates with asymptotically non-efficient weights. Note that asymptotically non-efficient weights are typically preferrable because they lead to less finite sample bias. Args: jac (np.ndarray or pandas.DataFrame): Numpy array or DataFrame with the jacobian of simulate_moments with respect to params. The derivative needs to be taken at the estimated parameters. Has shape n_moments, n_params. weights (np.ndarray): The weighting matrix for msm estimation. moments_cov (np.ndarray): The covariance matrix of the empirical moments. Returns: numpy.ndarray: numpy array with covariance matrix. """ _jac, _weights, _moments_cov, names = process_pandas_arguments( jac=jac, weights=weights, moments_cov=moments_cov) bread = robust_inverse( _jac.T @ _weights @ _jac, msg=INVALID_INFERENCE_MSG, ) butter = _jac.T @ _weights @ _moments_cov @ _weights @ _jac cov = bread @ butter @ bread if names: cov = pd.DataFrame(cov, columns=names.get("params"), index=names.get("params")) return cov
def cov_optimal(jac, weights): """Calculate the cov of msm estimates with asymptotically efficient weights. Note that asymptotically efficient weights have substantial finite sample bias and are typically not a good choice. Args: jac (np.ndarray or pandas.DataFrame): Numpy array or DataFrame with the jacobian of simulate_moments with respect to params. The derivative needs to be taken at the estimated parameters. Has shape n_moments, n_params. weights (np.ndarray): The weighting matrix for msm estimation. moments_cov (np.ndarray): The covariance matrix of the empirical moments. Returns: numpy.ndarray: numpy array with covariance matrix. """ _jac, _weights, names = process_pandas_arguments(jac=jac, weights=weights) cov = robust_inverse(_jac.T @ _weights @ _jac, msg=INVALID_INFERENCE_MSG) if names: cov = pd.DataFrame(cov, columns=names.get("params"), index=names.get("params")) return cov
def cov_robust(jac, hess): """Covariance of parameters based on HJJH dot product. H stands for Hessian of the log likelihood function and J for Jacobian, of the log likelihood per individual. Args: jac (numpy.ndarray): 2d array jacobian matrix of dimension (nobs, nparams) hess (numpy.ndarray): 2d array hessian matrix of dimension (nparams, nparams) Returns: numpy.ndarray: covariance HJJH matrix (nparams, nparams) Resources: https://tinyurl.com/yym5d4cw """ _jac, _hess, names = process_pandas_arguments(jac=jac, hess=hess) info_matrix_hess = -_hess cov_hess = robust_inverse(info_matrix_hess, msg=INVALID_INFERENCE_MSG) cov = cov_hess @ _jac.T @ _jac @ cov_hess if "params" in names: cov = pd.DataFrame(cov, columns=names["params"], index=names["params"]) return cov
def get_weighting_matrix(moments_cov, method, empirical_moments, clip_value=1e-6, return_type="pytree"): """Calculate a weighting matrix from moments_cov. Args: moments_cov (pandas.DataFrame or numpy.ndarray): Square DataFrame or Array with the covariance matrix of the moment conditions for msm estimation. method (str): One of "optimal", "diagonal". empirical_moments (pytree): Pytree containing empirical moments. Used to get the tree structure clip_value (float): Bound at which diagonal elements of the moments_cov are clipped to avoid dividing by zero. return_type (str): One of "pytree", "array" or "pytree_and_array" Returns: pandas.DataFrame or numpy.ndarray: Weighting matrix with the same shape as moments_cov. """ fast_path = isinstance(moments_cov, np.ndarray) and moments_cov.ndim == 2 if fast_path: _internal_cov = moments_cov else: _internal_cov = block_tree_to_matrix( moments_cov, outer_tree=empirical_moments, inner_tree=empirical_moments, ) if method == "optimal": array_weights = robust_inverse(_internal_cov) elif method == "diagonal": diagonal_values = 1 / np.clip(np.diagonal(_internal_cov), clip_value, np.inf) array_weights = np.diag(diagonal_values) else: raise ValueError(f"Invalid method: {method}") if return_type == "array" or (fast_path and "_and_" not in return_type): out = array_weights elif fast_path: out = (array_weights, array_weights) else: tree_weights = matrix_to_block_tree( array_weights, outer_tree=empirical_moments, inner_tree=empirical_moments, ) if return_type == "pytree": out = tree_weights else: out = (tree_weights, array_weights) return out
def _sandwich_step(hess, meat): """The sandwich estimator for variance estimation. This is used in several robust covariance formulae. Args: hess (np.array): "hessian" - a k + 1 x k + 1-dimensional array of second derivatives of the pseudo-log-likelihood function w.r.t. the parameters meat (np.array): the variance of the total scores Returns: se (np.array): a 1d array of k + 1 standard errors var (np.array): 2d variance-covariance matrix """ invhessian = robust_inverse(hess, INVALID_INFERENCE_MSG) var = np.dot(np.dot(invhessian, meat), invhessian) return var
def cov_jacobian(jac): """Covariance based on outer product of jacobian of loglikeobs. Args: jac (numpy.ndarray): 2d array jacobian matrix of dimension (nobs, nparams) Returns: numpy.ndarray: covariance matrix of size (nparams, nparams) Resources: Marno Verbeek - A guide to modern econometrics. """ _jac, names = process_pandas_arguments(jac=jac) info_matrix = np.dot((_jac.T), _jac) cov = robust_inverse(info_matrix, msg=INVALID_INFERENCE_MSG) if "params" in names: cov = pd.DataFrame(cov, columns=names["params"], index=names["params"]) return cov
def test_robust_inverse_singular(): mat = np.zeros((5, 5)) expected = np.zeros((5, 5)) with pytest.warns(UserWarning, match="LinAlgError"): calculated = robust_inverse(mat) aaae(calculated, expected)
def test_robust_inverse_nonsingular(): mat = np.eye(3) + 0.2 expected = np.linalg.inv(mat) calculated = robust_inverse(mat) aaae(calculated, expected)
def calculate_sensitivity_to_weighting(jac, weights, moments_cov, params_cov): """calculate the sensitivity to weighting. The sensitivity measure is calculated for each parameter wrt each moment. It answers the following question: How would the precision change if the weight of the kth moment is increased a little? Args: sensitivity_to_bias (np.ndarray or pandas.DataFrame): See ``calculate_sensitivity_to_bias`` for details. weights (np.ndarray or pandas.DataFrame): The weighting matrix used for msm estimation. moments_cov (numpy.ndarray or pandas.DataFrame): The covariance matrix of the empirical moments. params_cov (numpy.ndarray or pandas.DataFrame): The covariance matrix of the parameter estimates. Returns: np.ndarray or pd.DataFrame: Sensitivity measure with shape (n_params, n_moments) """ _jac, _weights, _moments_cov, _params_cov, names = process_pandas_arguments( jac=jac, weights=weights, moments_cov=moments_cov, params_cov=params_cov) gwg_inverse = _sandwich(_jac, _weights) gwg_inverse = robust_inverse(gwg_inverse, INVALID_SENSITIVITY_MSG) m6 = [] for k in range(len(_weights)): mask_matrix_o = np.zeros(shape=_weights.shape) mask_matrix_o[k, k] = 1 m6k_1 = gwg_inverse @ _sandwich(_jac, mask_matrix_o) @ _params_cov m6k_2 = (gwg_inverse @ _jac.T @ mask_matrix_o @ _moments_cov @ _weights @ _jac @ gwg_inverse) m6k_3 = (gwg_inverse @ _jac.T @ _weights @ _moments_cov @ mask_matrix_o @ _jac @ gwg_inverse) m6k_4 = _params_cov @ _sandwich(_jac, mask_matrix_o) @ gwg_inverse m6k = -m6k_1 + m6k_2 + m6k_3 - m6k_4 m6k = m6k.diagonal() m6.append(m6k) m6 = np.array(m6).T weights_diagonal = np.diagonal(_weights) params_variances = np.diagonal(_params_cov) e6 = m6 / params_variances.reshape(-1, 1) e6 = e6 * weights_diagonal if names: e6 = pd.DataFrame(e6, index=names.get("params"), columns=names.get("moments")) return e6