def estimate_C2(Z, C1_inv=None): D, M = Z.shape if C1_inv is None: Weight_mat = np.eye(M) - (1 / M) * np.ones((M, M)) return cov2corr(Z @ Weight_mat @ Z.T) return cov2corr(Z @ (C1_inv - (C1_inv @ np.ones( (M, M)) @ C1_inv) / np.matrix(C1_inv).sum()) @ Z.T)
def compute_GRM(self): self.K = linear_kinship(self.G, verbose=True) self.K_corr = cov2corr(self.K) Zm = (self.G - 2 * self.f) / np.sqrt(2 * self.f * (1 - self.f)) self.K_man = (1 / self.num_snps) * Zm @ Zm.T self.K_man_corr = cov2corr(self.K_man)
def test_cov2corr(): cov_a = np.ones((3, 3)) + np.diag(np.arange(1, 4) ** 2 - 1) corr_a = np.array([[1, 1 / 2., 1 / 3.], [1 / 2., 1, 1 / 2. / 3.], [1 / 3., 1 / 2. / 3., 1]]) corr = cov2corr(cov_a) assert_almost_equal(corr, corr_a, decimal=15) cov_mat = cov_a corr_mat = cov2corr(cov_mat) assert_(isinstance(corr_mat, np.ndarray)) assert_equal(corr_mat, corr) cov_ma = np.ma.array(cov_a) corr_ma = cov2corr(cov_ma) assert_equal(corr_mat, corr) assert_(isinstance(corr_ma, np.ma.core.MaskedArray)) cov_ma2 = np.ma.array(cov_a, mask=[[False, True, False], [True, False, False], [False, False, False]]) corr_ma2 = cov2corr(cov_ma2) assert_(np.ma.allclose(corr_ma, corr, atol=1e-15)) assert_equal(corr_ma2.mask, cov_ma2.mask)
def test_cov2corr(): cov_a = np.ones((3, 3)) + np.diag(np.arange(1, 4)**2 - 1) corr_a = np.array([[1, 1 / 2., 1 / 3.], [1 / 2., 1, 1 / 2. / 3.], [1 / 3., 1 / 2. / 3., 1]]) corr = cov2corr(cov_a) assert_almost_equal(corr, corr_a, decimal=15) cov_mat = np.matrix(cov_a) corr_mat = cov2corr(cov_mat) assert_(isinstance(corr_mat, np.matrixlib.defmatrix.matrix)) assert_equal(corr_mat, corr) cov_ma = np.ma.array(cov_a) corr_ma = cov2corr(cov_ma) assert_equal(corr_mat, corr) assert_(isinstance(corr_ma, np.ma.core.MaskedArray)) cov_ma2 = np.ma.array(cov_a, mask=[[False, True, False], [True, False, False], [False, False, False]]) corr_ma2 = cov2corr(cov_ma2) assert_(np.ma.allclose(corr_ma, corr, atol=1e-15)) assert_equal(corr_ma2.mask, cov_ma2.mask)
def step0_whole(self): self.C1_raw = None chromsome_unique, _ = np.unique(self.chromsome_list, return_inverse=True) centering = np.eye(self.num_sample) - (1 / self.num_sample) * np.ones( (self.num_sample, self.num_sample)) now = datetime.now().time() self.C1_raw = self.G.T @ centering @ self.G self.C1 = cov2corr(self.C1_raw) self.C1_ = cov2corr(self.C1_raw + np.eye(self.num_snps) * self.perturbation)
def shrinkage_est(self, res, target): ''' Estimate the covariance matrix, using the shrinkage estimator Parameters ---------- res : pandas.DataFrame or numpy.array DESCRIPTION. target : pandas.DataFrame or numpy.array DESCRIPTION. Returns ------- shrink_cov : numpy.array Shrinkage covariance estimator. lamb : double Shrinkage parameter. ''' #Make sure res and target are np arrays res = np.array(res) target = np.array(target) #Get the parameters of the residuals matrix n = res.shape[0] #Get the sample correlation and covariance matrix of the residuals covm = res.T.dot(res) / n corm = moment_helpers.cov2corr(covm) #Give the residuals equal standard deviation (=1) res_scaled = res / np.diag(covm)**0.5 #Define the shrinking intensity lambda v = (1 / (n * (n - 1))) * (((res_scaled**2).T.dot(res_scaled**2)) - (1 / n) * (((res_scaled).T.dot(res_scaled))**2)) np.fill_diagonal(v, 0) corapn = moment_helpers.cov2corr(target) d = (corm - corapn)**2 lamb = sum(sum(v)) / sum(sum(d)) lamb = max(min(lamb, 1), 0) #Define the shrinkage estimator shrink_cov = lamb * target + (1 - lamb) * covm return shrink_cov, lamb
def create_dict(self): """Return a PMBEC correlation 2D dictionary.""" # Silence stdout, since read_coefficients prints to stdout # TODO: Just fix pepdata.pmbec to not do this. with no_stdout(): pmbec_coeffs = pmbec.read_coefficients() pmbec_coeffs_df = pd.DataFrame(pmbec_coeffs) # Use correlation rather than covariance pmbec_df = pd.DataFrame(cov2corr(pmbec_coeffs_df)) pmbec_df.index = pmbec_coeffs_df.index pmbec_df.columns = pmbec_coeffs_df.columns # Include invalid letters, as Smith-Waterman expects substitution matrix values for them pmbec_dict = defaultdict(dict) pmbec_dict.update(pmbec_df.to_dict()) valid_letters = set(pmbec_dict.keys()) all_letters = valid_letters.union(INVALID_AMINO_ACID_LETTERS) for letter_i in all_letters: for letter_j in all_letters: if not(letter_i in valid_letters and letter_j in valid_letters): # We dont need lower than 0, as Smith-Waterman sets negative scores to 0 pmbec_dict[letter_i][letter_j] = 0 return pmbec_dict
def test_eigenvalue_calculation(self): # Test to make sure non-group and group versions agree # (in the case of no grouping) p = 100 groups = np.arange(0, p, 1) + 1 for rho in [0, 0.3, 0.5, 0.7]: V = np.zeros((p, p)) + rho for i in range(p): V[i, i] = 1 expected_gamma = min(1, 2 * (1 - rho)) gamma = knockoffs.calc_min_group_eigenvalue( Sigma=V, groups=groups, ) np.testing.assert_almost_equal( gamma, expected_gamma, decimal=3, err_msg='calc_min_group_eigenvalue calculates wrong eigenvalue' ) # Test non equicorrelated version V = np.random.randn(p, p) V = np.dot(V.T, V) + 0.1 * np.eye(p) V = cov2corr(V) expected_gamma = min(1, 2 * np.linalg.eigh(V)[0].min()) gamma = knockoffs.calc_min_group_eigenvalue(Sigma=V, groups=groups) np.testing.assert_almost_equal( gamma, expected_gamma, decimal=3, err_msg='calc_min_group_eigenvalue calculates wrong eigenvalue')
def get_covar(self): ''' Computes the sample variance-covariance and correlation matrices for the returns of the portfolio assets. returns: self.covar, a dataframe of pairwise covariance coefficients between each of the portfolio assets. self.corrs, a dataframe of pairwise correlation coefficients between each of the portfolio assets. ''' # compute covariances cov = np.cov(self.returns, rowvar=False) # assign to self as neat dataframe self.covar = pd.DataFrame(cov, columns=self.assets, index=self.assets) # compute correlation matrix from covar matrix corrs = cov2corr(self.covar) self.corrs = pd.DataFrame(corrs, columns=self.assets, index=self.assets) return self
def create_dict(self): """Return a PMBEC correlation 2D dictionary.""" # Silence stdout, since read_coefficients prints to stdout # TODO: Just fix pepdata.pmbec to not do this. with no_stdout(): pmbec_coeffs = pmbec.read_coefficients() pmbec_coeffs_df = pd.DataFrame(pmbec_coeffs) # Use correlation rather than covariance pmbec_df = pd.DataFrame(cov2corr(pmbec_coeffs_df)) pmbec_df.index = pmbec_coeffs_df.index pmbec_df.columns = pmbec_coeffs_df.columns # Include invalid letters, as Smith-Waterman expects substitution matrix values for them pmbec_dict = defaultdict(dict) pmbec_dict.update(pmbec_df.to_dict()) valid_letters = set(pmbec_dict.keys()) all_letters = valid_letters.union(INVALID_AMINO_ACID_LETTERS) for letter_i in all_letters: for letter_j in all_letters: if not (letter_i in valid_letters and letter_j in valid_letters): # We dont need lower than 0, as Smith-Waterman sets negative scores to 0 pmbec_dict[letter_i][letter_j] = 0 return pmbec_dict
def mle_estimate_error(self): '''MLE-estimation from the POPRES analysis. Bins the data. And can deal with errors. Param[0] always C; Param[1] always sigma''' # First create mle_object pw_dist, pw_IBD, pair_nr = self.give_pairwise_statistics() # Create full pw. statistics pw_dist, pw_IBD, pair_nr = self.bin_pairwise_statistics(pw_dist, pw_IBD, pair_nr) bl_shr_density = uniform_density start_params = [1.0, 2.0] # 1: D 2: Sigma # Create MLE_estimation object: ml_estimator = MLE_estim_error(bl_shr_density, start_params, pw_dist, pw_IBD, pair_nr, error_model=False) self.estimates = start_params # Best guess without doing anything. Used as start for Bootstrap print("Doing fit...") results = ml_estimator.fit() # method="nelder-mead" # results0 = ml_estimator.fit(method="BFGS") # Do the actual fit. method="BFGS" possible self.estimates = results.params # Save the paramter estimates fisher_info = np.matrix(ml_estimator.hessian(results.params)) # Get the Fisher Info matrix corr_mat = cov2corr(-fisher_info.I) print(corr_mat) stds = np.sqrt(np.diag(-fisher_info.I)) self.stds = stds # Save estimated STDS for i in range(len(results.params)): print("Parameter %i: %.6f" % (i, results.params[i])) print("CI: " + str(results.conf_int()[i])) # print("Estimated STD: %.6f" % stds[i]) # print("D=%.5f" % self.from_C_to_D_e(results.params[0], results.params[1])) print(results.summary()) # Give out the results.
def estimate_C1(G, chromsome_list, Vmouse=None, Lambda=0.005, UseR=False): N, M = G.shape C1, C1_inv = None, None chromsome_unique, _ = np.unique(chromsome_list, return_inverse=True) counter = 1 if Vmouse is None: Weight_mat = np.eye(N) - (1 / N) * np.ones((N, N)) else: Weight_mat = find_inv_via_R(Vmouse) if UseR else np.linalg.inv(Vmouse) for chrom in chromsome_unique: subset = np.where(chromsome_list == chrom)[0] G_m = G[:, subset] V1_block = (G_m - 1).T @ Weight_mat @ (G_m - 1) C1_block = cov2corr(V1_block) C1_block = (1 - Lambda) * C1_block + Lambda * np.eye(C1_block.shape[0]) C1_inv_block = find_inv_via_R(C1_block) if UseR else np.linalg.inv( C1_block) if counter == 1: C1 = C1_block C1_inv = C1_inv_block counter += 1 else: C1 = block_diag(C1, C1_block) C1_inv = block_diag(C1_inv, C1_inv_block) return C1, C1_inv
def check_colCorr(self): index = np.arange(0, self.num_trait, 25) self.colCorr = cov2corr(self.colCov) self.colCorr_subset = self.colCorr[index, :][:, index] fig = plt.figure() ax = fig.add_subplot(1, 1, 1) ax = sns.heatmap(self.colCorr_subset, center=0) plt.savefig("colCorr_heatmap.pdf")
def generate( mu: pd.Series, Q: pd.DataFrame, nPaths: int = 100, repeat: int = 250, T: int = 6, ): total = np.zeros(len(mu)) num_asset = len(mu) rho = cov2corr(Q) L = cholesky(rho, lower=True) dt = T confidence_level = 0.95 variances = np.diag(Q) f = np.zeros(num_asset + nPaths + 1) f[:nPaths] = 1 / ((1 - confidence_level) * nPaths) f[nPaths : num_asset + nPaths] = 0 f[-1] = 1 A = np.array( [[0.0 for k in range(nPaths + num_asset + 1)] for j in range(2 * nPaths)] ) A[:nPaths, :nPaths] = -1 * np.eye(nPaths) A[nPaths : (2 * nPaths), :nPaths] = -1 * np.eye(nPaths) A[nPaths : (2 * nPaths), -1] = -1 Aeq = np.array([[0.0 for k in range(nPaths + num_asset + 1)] for j in range(1)]) Aeq[0, nPaths : (nPaths + num_asset)] = 1 beq = np.array([1]) b = np.array([0.0 for k in range(3 * nPaths + num_asset + 1)]) b[(2 * nPaths) : (3 * nPaths)] = 1000000000000 b[(3 * nPaths) : num_asset + 3 * nPaths] = 0 b[-1] = 1000000000000 temp = -1 * np.eye(nPaths + num_asset + 1) exp_term_1 = ((mu.to_numpy() - 0.5 * variances) * dt).reshape(-1, 1) exp_term_2 = np.sqrt(variances * dt).reshape(-1, 1) for i in range(repeat): S = np.zeros((num_asset, 2, nPaths)) S[:, 0, :] = 100 xi = np.dot(L, np.random.randn(num_asset, nPaths)) S[:, 1, :] = S[:, 0, :] * np.exp(exp_term_1 + exp_term_2 * xi) # returns_sample n_asset * nPeriod * nPaths returns_sample = S[:, -1, :] / S[:, 0, :] - 1 for i in range((nPaths), (2 * nPaths)): A[i, nPaths : (nPaths + num_asset)] = -returns_sample[:, i - nPaths] A_ub = np.concatenate((A, temp), axis=0) res = linprog( c=f, A_ub=A_ub, b_ub=b, A_eq=Aeq, b_eq=beq, method="interior-point" ) total = np.add(total, res.x[nPaths : nPaths + num_asset]) return pd.Series(total / repeat, index=mu.index)
def plot_black_litterman_results(ret_bl, covar_bl, market_prior, mu): rets_df = pd.DataFrame( [market_prior, ret_bl, pd.Series(mu)], index=["Prior", "Posterior", "Views"]).T rets_df.plot.bar(figsize=(12, 8), title='Black-Litterman Expected Returns') plot_heatmap(covar_bl, 'Black-Litterman Covariance', '', '') corr_bl = mh.cov2corr(covar_bl) corr_bl = pd.DataFrame(corr_bl, index=covar_bl.index, columns=covar_bl.columns) plot_heatmap(corr_bl, 'Black-Litterman Correlation', '', '')
def generate_pheno(kinship, hsquared, N=300, P=15, rho=0.45): ''' Generates phenotype data from MN distribution N = n_samples, P = n_traits, and rho is the autocorrelation parameter to B kinship matrix must be NxN RETURNS ndarray of size (N x P) ''' assert kinship.shape == (N, N) B = generate_B(P, rho) E = generate_E(P) chumma = np.linalg.cholesky(kinship) U = matrix_normal.rvs(rowcov=kinship, colcov=hsquared * cov2corr(B)) epsilon = matrix_normal.rvs(rowcov=np.eye(N), colcov=(1 - hsquared) * cov2corr(E)) return U + epsilon
def test_psd(self): # Test S matrix construction p = 100 V = np.random.randn(p, p) V = np.dot(V.T, V) + 0.1 * np.eye(p) V = cov2corr(V) # Create random groups groups = np.random.randint(1, p, size=(p)) groups = utilities.preprocess_groups(groups) S = knockoffs.equicorrelated_block_matrix(Sigma=V, groups=groups) # Check S properties self.check_S_properties(V, S, groups)
def test_cov_diagonal(cov, nobs): r"""One sample hypothesis test that covariance matrix is diagonal matrix. The Null and alternative hypotheses are .. math:: H0 &: \Sigma = diag(\sigma_i) \\ H1 &: \Sigma \neq diag(\sigma_i) where :math:`\sigma_i` are the variances with unspecified values. Parameters ---------- cov : array_like Covariance matrix of the data, estimated with denominator ``(N - 1)``, i.e. `ddof=1`. nobs : int number of observations used in the estimation of the covariance Returns ------- res : instance of HolderTuple results with ``statistic, pvalue`` and other attributes like ``df`` References ---------- Rencher, Alvin C., and William F. Christensen. 2012. Methods of Multivariate Analysis: Rencher/Methods. Wiley Series in Probability and Statistics. Hoboken, NJ, USA: John Wiley & Sons, Inc. https://doi.org/10.1002/9781118391686. StataCorp, L. P. Stata Multivariate Statistics: Reference Manual. Stata Press Publication. """ cov = np.asarray(cov) k = cov.shape[0] R = cov2corr(cov) statistic = -(nobs - 1 - (2 * k + 5) / 6) * _logdet(R) df = k * (k - 1) / 2 pvalue = stats.chi2.sf(statistic, df) return HolderTuple(statistic=statistic, pvalue=pvalue, df=df, distr="chi2", null="diagonal" )
def step1(self): now = datetime.now().time() print('******************************* Start to obtain C2:', " now the time is ", now) #u, s, _ = np.linalg.svd(self.V1) #L = u @ np.sqrt(np.diag(s)) @ u.T L = np.linalg.cholesky(self.V1) Z_update = self.Z @ np.linalg.inv(L).T centering = np.eye(self.num_snps) - (1 / self.num_snps) * np.ones( (self.num_snps, self.num_snps)) self.C2_raw = Z_update @ centering @ Z_update.T self.C2 = cov2corr(self.C2_raw) now = datetime.now().time() print('******************************* Finished obtainning C2.', "now the time is ", now)
def hub_spoke_corr_mat(D=50, groups=5, v=0.3, u=0.1): G = D // groups # group size Theta = np.zeros([D, D]) for g in range(groups): for i in range(G): Theta[g * G, g * G + i] = Theta[g * G + i, g * G] = 1 Theta[np.diag_indices(D)] = 0 Omega = Theta * v Omega[np.diag_indices(D)] = np.abs(np.min( np.linalg.eigvals(Omega))) + 0.1 + u Sigma = cov2corr(np.linalg.inv(Omega)) Omega = np.linalg.inv(Sigma) return Omega, Sigma
def hub_spoke_corr_mat(D=50, groups=5, v=0.3, u=0.1): """Port of data generation code from Wasserman's Huge package.""" from statsmodels.stats.moment_helpers import cov2corr G = D // groups # group size Theta = np.zeros([D, D]) for g in range(groups): for i in range(G): Theta[g * G, g * G + i] = Theta[g * G + i, g * G] = 1 Theta[np.diag_indices(D)] = 0 Omega = Theta * v Omega[np.diag_indices(D)] = np.abs(np.min( np.linalg.eigvals(Omega))) + 0.1 + u Sigma = cov2corr(np.linalg.inv(Omega)) Omega = np.linalg.inv(Sigma) return Omega, Sigma
def mle_analysis_error(self): '''Does a maximum likelihood analysis with the full error model. Parameters can be found there Param[0] always C; Param[1] always sigma''' ml_estimator = self.mle_object print("Doing fit...") results = ml_estimator.fit() # method="nelder-mead" # results0 = ml_estimator.fit(method="BFGS") # Do the actual fit. method="BFGS" possible self.estimates = results.params # Save the paramter estimates (0: c 1:sigma ...) self.ci_s = results.conf_int() fisher_info = np.matrix(ml_estimator.hessian(results.params)) # Get the Fisher Info matrix corr_mat = cov2corr(-fisher_info.I) print(corr_mat) stds = np.sqrt(np.diag(-fisher_info.I)) self.stds = stds # Save estimated STDS for i in range(len(results.params)): print("Parameter %i: %.6f" % (i, results.params[i])) # print("Estimated STD: %.6f" % stds[i]) print(results.summary()) # Give out the results. self.mle_object = ml_estimator # Remember the mle-estimation object.
def test_equicorrelated_construction(self): # Test S matrix construction p = 100 groups = np.arange(0, p, 1) + 1 V = np.random.randn(p, p) V = np.dot(V.T, V) + 0.1 * np.eye(p) V = cov2corr(V) # Expected construction expected_gamma = min(1, 2 * np.linalg.eigh(V)[0].min()) expected_S = expected_gamma * np.eye(p) # Equicorrelated S = knockoffs.equicorrelated_block_matrix(Sigma=V, groups=groups) # Test to make sure the answer is expected np.testing.assert_almost_equal( S, expected_S, decimal=3, err_msg='calc_min_group_eigenvalue calculates wrong eigenvalue')
def generate_rho_u(corr_type, D, num_spike=10, scale=0.1, num_block=10): if corr_type == "identity": return np.eye(D) elif corr_type == "spiked": spiked = 0 for i in range(num_spike): v = np.random.rand(D, 1) v = v / np.linalg.norm(v, 2) spiked += (2**(-i + 1)) * (v @ v.T) return cov2corr(np.eye(D) + scale * spiked) elif corr_type == "geometric": rho_u = None block_size = int(D / num_block) rho_u_block = toeplitz([(scale**i) for i in range(block_size)]) for i in range(num_block): if i == 0: rho_u = rho_u_block else: rho_u = block_diag(rho_u, rho_u_block) return rho_u
def test_sdp_tolerance(self): # Get graph np.random.seed(110) Q = graphs.ErdosRenyi(p=50, tol=1e-1) V = cov2corr(utilities.chol2inv(Q)) groups = np.concatenate([np.zeros(10) + j for j in range(5)]) + 1 groups = groups.astype('int32') # Solve SDP for tol in [1e-3, 0.01, 0.02]: S = knockoffs.compute_S_matrix(Sigma=V, groups=groups, method='sdp', objective="pnorm", num_iter=10, tol=tol) G = np.hstack([np.vstack([V, V - S]), np.vstack([V - S, V])]) mineig = np.linalg.eig(G)[0].min() self.assertTrue( tol - mineig > -1 * tol / 10, f'sdp solver fails to control minimum eigenvalues: tol is {tol}, val is {mineig}' ) self.check_S_properties(V, S, groups)
mu = mu.T #n_stock x n_stock asset covariance matrix Q = np.dot(np.dot(V.T,F),V) + D return [mu, Q] [mu, Q] = fama_french(returns, factors) ## ******************************************** ## stochastic MVO ## ******************************************** num_asset = len(mu) rho = cov2corr(Q) nPaths = 400 L = cholesky(rho, lower=True) T = 12 N = 3 dt = T/N # Because it is in a minimization problem, so minus means reward reward_per_dollor_surplus = -2 # Because it is in a minimization problem, so positive means punishment punishment_per_dollor_shortfall = 1 #risk adversion coefficient risk_weight_coefficient = 1000
def covariance2errors(covariances): corr, std = zip(*[cov2corr(c, True) for c in covariances]) return np.asarray(corr), np.asarray(std)
def test_short_panel(): #this checks that some basic statistical properties are satisfied by the ##1lab_results, not verified #1lab_results against other packages #Note: the ranking of robust bse is different if within=True #I added within keyword to PanelSample to be able to use old example #if within is False, then there is no within group variation in exog. nobs = 100 nobs_i = 5 n_groups = nobs // nobs_i k_vars = 3 dgp = PanelSample(nobs, k_vars, n_groups, corr_structure=cs.corr_arma, corr_args=([1], [1., -0.9],), seed=377769, within=False) #print 'seed', dgp.seed y = dgp.generate_panel() noise = y - dgp.y_true #test dgp dgp_cov_e = np.array( [[ 1. , 0.9 , 0.81 , 0.729 , 0.6561], [ 0.9 , 1. , 0.9 , 0.81 , 0.729 ], [ 0.81 , 0.9 , 1. , 0.9 , 0.81 ], [ 0.729 , 0.81 , 0.9 , 1. , 0.9 ], [ 0.6561, 0.729 , 0.81 , 0.9 , 1. ]]) npt.assert_almost_equal(dgp.cov, dgp_cov_e, 13) cov_noise = np.cov(noise.reshape(-1,n_groups, order='F')) corr_noise = cov2corr(cov_noise) npt.assert_almost_equal(corr_noise, dgp.cov, 1) #estimate panel model mod2 = ShortPanelGLS(y, dgp.exog, dgp.groups) res2 = mod2.fit_iterative(2) #whitened residual should be uncorrelated corr_wresid = np.corrcoef(res2.wresid.reshape(-1,n_groups, order='F')) assert_maxabs(corr_wresid, np.eye(5), 0.1) #residual should have same correlation as dgp corr_resid = np.corrcoef(res2.resid.reshape(-1,n_groups, order='F')) assert_maxabs(corr_resid, dgp.cov, 0.1) assert_almost_equal(res2.resid.std(),1, decimal=0) y_pred = np.dot(mod2.exog, res2.params) assert_almost_equal(res2.fittedvalues, y_pred, 13) #compare with OLS res2_ols = mod2._fit_ols() npt.assert_(mod2.res_pooled is res2_ols) res2_ols = mod2.res_pooled #TODO: BUG: requires call to _fit_ols #fitting once is the same as OLS #note: I need to create new instance, otherwise it continuous fitting mod1 = ShortPanelGLS(y, dgp.exog, dgp.groups) res1 = mod1.fit_iterative(1) assert_almost_equal(res1.params, res2_ols.params, decimal=13) assert_almost_equal(res1.bse, res2_ols.bse, decimal=13) res_ols = OLS(y, dgp.exog).fit() assert_almost_equal(res1.params, res_ols.params, decimal=13) assert_almost_equal(res1.bse, res_ols.bse, decimal=13) #compare with old version mod_old = ShortPanelGLS2(y, dgp.exog, dgp.groups) res_old = mod_old.fit() assert_almost_equal(res2.params, res_old.params, decimal=13) assert_almost_equal(res2.bse, res_old.bse, decimal=13) mod5 = ShortPanelGLS(y, dgp.exog, dgp.groups) res5 = mod5.fit_iterative(5) #make sure it's different #npt.assert_array_less(0.009, em.maxabs(res5.bse, res2.bse)) cov_clu = sw.cov_cluster(mod2.res_pooled, dgp.groups.astype(int)) clubse = se_cov(cov_clu) pnwbse = se_cov(sw.cov_nw_panel(mod2.res_pooled, 4, mod2.group.groupidx)) bser = np.vstack((res2.bse, res5.bse, clubse, pnwbse)) bser_mean = np.mean(bser, axis=0) #cov_cluster close to robust and PanelGLS #is up to 24% larger than mean of bser #npt.assert_array_less(0, clubse / bser_mean - 1) npt.assert_array_less(clubse / bser_mean - 1, 0.25) #cov_nw_panel close to robust and PanelGLS npt.assert_array_less(pnwbse / bser_mean - 1, 0.1) #OLS underestimates bse, robust at least 60% larger npt.assert_array_less(0.6, bser_mean / res_ols.bse - 1) #cov_hac_panel with uniform_kernel is the same as cov_cluster for balanced #panel with full length kernel #I fixe default correction to be equal cov_uni = sw.cov_nw_panel(mod2.res_pooled, 4, mod2.group.groupidx, weights_func=sw.weights_uniform, use_correction='c') assert_almost_equal(cov_uni, cov_clu, decimal=13) #without correction cov_clu2 = sw.cov_cluster(mod2.res_pooled, dgp.groups.astype(int), use_correction=False) cov_uni2 = sw.cov_nw_panel(mod2.res_pooled, 4, mod2.group.groupidx, weights_func=sw.weights_uniform, use_correction=False) assert_almost_equal(cov_uni2, cov_clu2, decimal=13) cov_white = sw.cov_white_simple(mod2.res_pooled) cov_pnw0 = sw.cov_nw_panel(mod2.res_pooled, 0, mod2.group.groupidx, use_correction='hac') assert_almost_equal(cov_pnw0, cov_white, decimal=13)
import numpy as np from statsmodels.stats import moment_helpers as mh from math import sqrt from numpy import linalg as LA sigma = np.array([[4, 2, -3], [2, 9, 0], [-3, 0, 9]]) x1 = np.array([[1], [0], [0]]) x2 = np.array([[0], [-1], [0]]) x3 = np.array([[1], [1], [0]]) x3_hat = x3 / (x3**2).sum()**0.5 print('Original Matrix') print(sigma) x1_var = np.transpose(x1) * sigma * x1 x2_var = np.transpose(x2) * sigma * x2 x3_var = np.transpose(x3_hat) * sigma * x3_hat print('Variance (x1) = \n', x1_var) print('Variance (x2) = \n', x2_var) print('Variance (x3) =', x3_var.sum()) print() print('Cov 2 Corr') print(mh.cov2corr(sigma))
def cov(self, k_vars=None): return cov2corr(self.corr(k_vars=None), self.sigma)
kernel_mtx[t_ix, np.arange(t_ix + 1, len_t)] = kernel_vec[np.arange( 1, len_t - t_ix)] kernel_mtx = kernel_mtx + np.transpose(kernel_mtx) + np.eye(len_t) var_E = [] for i in range(n): var_i = [] for t in range(len_t): var_s = [] for s in range(len_t): var_s.append(kernel_mtx[t, s] * np.outer(Y[:, i, :][s, :], Y[:, i, :][s, :])) var_i.append(np.sum(np.array(var_s), 0) / np.sum(kernel_mtx[t, :])) var_E.append(var_i) alpha_1 = alpha_max(cov2corr(C0_hat)) alpha_0 = alpha_1 * 0.1 alphas = np.logspace(np.log10(alpha_1), np.log10(alpha_0), 50) A0_hat_list = [cov.graph_lasso(cov2corr(C0_hat), alpha)[1] for alpha in alphas] alpha_1 = alpha_max(np.array(cov2corr(C0))) alpha_0 = alpha_1 * 0.1 alphas = np.logspace(np.log10(alpha_1), np.log10(alpha_0), 50) A0_oracle_list = [ cov.graph_lasso(np.array(cov2corr(C0)), alpha)[1] for alpha in alphas ] alpha_1 = alpha_max(cov2corr(C0_X_hat)) alpha_0 = alpha_1 * 0.1 alphas = np.logspace(np.log10(alpha_1), np.log10(alpha_0), 50) A0_X_list = [cov.graph_lasso(cov2corr(C0_X_hat), alpha)[1] for alpha in alphas]
def cov_nearest(cov, method='clipped', threshold=1e-15, n_fact=100, return_all=False): ''' Find the nearest covariance matrix that is postive (semi-) definite This leaves the diagonal, i.e. the variance, unchanged Parameters ---------- cov : ndarray, (k,k) initial covariance matrix method : string if "clipped", then the faster but less accurate ``corr_clipped`` is used. if "nearest", then ``corr_nearest`` is used threshold : float clipping threshold for smallest eigen value, see Notes nfact : int or float factor to determine the maximum number of iterations in ``corr_nearest``. See its doc string return_all : bool if False (default), then only the covariance matrix is returned. If True, then correlation matrix and standard deviation are additionally returned. Returns ------- cov_ : ndarray corrected covariance matrix corr_ : ndarray, (optional) corrected correlation matrix std_ : ndarray, (optional) standard deviation Notes ----- This converts the covariance matrix to a correlation matrix. Then, finds the nearest correlation matrix that is positive semidefinite and converts it back to a covariance matrix using the initial standard deviation. The smallest eigenvalue of the intermediate correlation matrix is approximately equal to the ``threshold``. If the threshold=0, then the smallest eigenvalue of the correlation matrix might be negative, but zero within a numerical error, for example in the range of -1e-16. Assumes input covariance matrix is symmetric. See Also -------- corr_nearest corr_clipped ''' from statsmodels.stats.moment_helpers import cov2corr, corr2cov cov_, std_ = cov2corr(cov, return_std=True) if method == 'clipped': corr_ = corr_clipped(cov_, threshold=threshold) elif method == 'nearest': corr_ = corr_nearest(cov_, threshold=threshold, n_fact=n_fact) cov_ = corr2cov(corr_, std_) if return_all: return cov_, corr_, std_ else: return cov_
def cov_nearest(cov, method='clipped', threshold=1e-15, n_fact=100, return_all=False): """ Find the nearest covariance matrix that is postive (semi-) definite This leaves the diagonal, i.e. the variance, unchanged Parameters ---------- cov : ndarray, (k,k) initial covariance matrix method : string if "clipped", then the faster but less accurate ``corr_clipped`` is used.if "nearest", then ``corr_nearest`` is used threshold : float clipping threshold for smallest eigen value, see Notes n_fact : int or float factor to determine the maximum number of iterations in ``corr_nearest``. See its doc string return_all : bool if False (default), then only the covariance matrix is returned. If True, then correlation matrix and standard deviation are additionally returned. Returns ------- cov_ : ndarray corrected covariance matrix corr_ : ndarray, (optional) corrected correlation matrix std_ : ndarray, (optional) standard deviation Notes ----- This converts the covariance matrix to a correlation matrix. Then, finds the nearest correlation matrix that is positive semidefinite and converts it back to a covariance matrix using the initial standard deviation. The smallest eigenvalue of the intermediate correlation matrix is approximately equal to the ``threshold``. If the threshold=0, then the smallest eigenvalue of the correlation matrix might be negative, but zero within a numerical error, for example in the range of -1e-16. Assumes input covariance matrix is symmetric. See Also -------- corr_nearest corr_clipped """ from statsmodels.stats.moment_helpers import cov2corr, corr2cov cov_, std_ = cov2corr(cov, return_std=True) if method == 'clipped': corr_ = corr_clipped(cov_, threshold=threshold) else: # method == 'nearest' corr_ = corr_nearest(cov_, threshold=threshold, n_fact=n_fact) cov_ = corr2cov(corr_, std_) if return_all: return cov_, corr_, std_ else: return cov_
def test_short_panel(): #this checks that some basic statistical properties are satisfied by the #results, not verified results against other packages #Note: the ranking of robust bse is different if within=True #I added within keyword to PanelSample to be able to use old example #if within is False, then there is no within group variation in exog. nobs = 100 nobs_i = 5 n_groups = nobs // nobs_i k_vars = 3 dgp = PanelSample(nobs, k_vars, n_groups, corr_structure=cs.corr_arma, corr_args=([1], [1., -0.9],), seed=377769, within=False) #print 'seed', dgp.seed y = dgp.generate_panel() noise = y - dgp.y_true #test dgp dgp_cov_e = np.array( [[ 1. , 0.9 , 0.81 , 0.729 , 0.6561], [ 0.9 , 1. , 0.9 , 0.81 , 0.729 ], [ 0.81 , 0.9 , 1. , 0.9 , 0.81 ], [ 0.729 , 0.81 , 0.9 , 1. , 0.9 ], [ 0.6561, 0.729 , 0.81 , 0.9 , 1. ]]) npt.assert_almost_equal(dgp.cov, dgp_cov_e, 13) cov_noise = np.cov(noise.reshape(-1,n_groups, order='F')) corr_noise = cov2corr(cov_noise) npt.assert_almost_equal(corr_noise, dgp.cov, 1) #estimate panel model mod2 = ShortPanelGLS(y, dgp.exog, dgp.groups) res2 = mod2.fit_iterative(2) #whitened residual should be uncorrelated corr_wresid = np.corrcoef(res2.wresid.reshape(-1,n_groups, order='F')) assert_maxabs(corr_wresid, np.eye(5), 0.1) #residual should have same correlation as dgp corr_resid = np.corrcoef(res2.resid.reshape(-1,n_groups, order='F')) assert_maxabs(corr_resid, dgp.cov, 0.1) assert_almost_equal(res2.resid.std(),1, decimal=0) y_pred = np.dot(mod2.exog, res2.params) assert_almost_equal(res2.fittedvalues, y_pred, 13) #compare with OLS res2_ols = mod2._fit_ols() npt.assert_(mod2.res_pooled is res2_ols) res2_ols = mod2.res_pooled #TODO: BUG: requires call to _fit_ols #fitting once is the same as OLS #note: I need to create new instance, otherwise it continuous fitting mod1 = ShortPanelGLS(y, dgp.exog, dgp.groups) res1 = mod1.fit_iterative(1) assert_almost_equal(res1.params, res2_ols.params, decimal=13) assert_almost_equal(res1.bse, res2_ols.bse, decimal=13) res_ols = OLS(y, dgp.exog).fit() assert_almost_equal(res1.params, res_ols.params, decimal=13) assert_almost_equal(res1.bse, res_ols.bse, decimal=13) #compare with old version mod_old = ShortPanelGLS2(y, dgp.exog, dgp.groups) res_old = mod_old.fit() assert_almost_equal(res2.params, res_old.params, decimal=13) assert_almost_equal(res2.bse, res_old.bse, decimal=13) mod5 = ShortPanelGLS(y, dgp.exog, dgp.groups) res5 = mod5.fit_iterative(5) #make sure it's different #npt.assert_array_less(0.009, em.maxabs(res5.bse, res2.bse)) cov_clu = sw.cov_cluster(mod2.res_pooled, dgp.groups.astype(int)) clubse = se_cov(cov_clu) pnwbse = se_cov(sw.cov_nw_panel(mod2.res_pooled, 4, mod2.group.groupidx)) bser = np.vstack((res2.bse, res5.bse, clubse, pnwbse)) bser_mean = np.mean(bser, axis=0) #cov_cluster close to robust and PanelGLS #is up to 24% larger than mean of bser #npt.assert_array_less(0, clubse / bser_mean - 1) npt.assert_array_less(clubse / bser_mean - 1, 0.25) #cov_nw_panel close to robust and PanelGLS npt.assert_array_less(pnwbse / bser_mean - 1, 0.1) #OLS underestimates bse, robust at least 60% larger npt.assert_array_less(0.6, bser_mean / res_ols.bse - 1) #cov_hac_panel with uniform_kernel is the same as cov_cluster for balanced #panel with full length kernel #I fixe default correction to be equal cov_uni = sw.cov_nw_panel(mod2.res_pooled, 4, mod2.group.groupidx, weights_func=sw.weights_uniform, use_correction='c') assert_almost_equal(cov_uni, cov_clu, decimal=13) #without correction cov_clu2 = sw.cov_cluster(mod2.res_pooled, dgp.groups.astype(int), use_correction=False) cov_uni2 = sw.cov_nw_panel(mod2.res_pooled, 4, mod2.group.groupidx, weights_func=sw.weights_uniform, use_correction=False) assert_almost_equal(cov_uni2, cov_clu2, decimal=13) cov_white = sw.cov_white_simple(mod2.res_pooled) cov_pnw0 = sw.cov_nw_panel(mod2.res_pooled, 0, mod2.group.groupidx, use_correction='hac') assert_almost_equal(cov_pnw0, cov_white, decimal=13)