def repair_covariance(M2, reg_cov): ''' Suppress numeric errors by keeping covariance matrix positive semidefiniteb ''' K = M2.shape[0] strength = reg_cov + max(0, -np.min(np.diag(M2))) M2 = 0.5 * (M2 + M2.T) + strength * np.eye(K) for retry in range(20): try: ch = chol(M2) break except LinAlgError: M2 += strength * np.eye(K) strength *= 2 try: with warnings.catch_warnings(): warnings.filterwarnings('ignore') M2 = cov_nearest(M2, method="clipped") except (KeyboardInterrupt, SystemExit): raise except: # covariance repair failed! try: with warnings.catch_warnings(): warnings.filterwarnings('ignore') M2 = cov_nearest(M2, method="nearest") except (KeyboardInterrupt, SystemExit): raise except: # covariance repair failed! pass return M2
def test_corrpsd_threshold(): x = np.array([[1, -0.9, -0.9], [-0.9, 1, -0.9], [-0.9, -0.9, 1]]) #print np.linalg.eigvalsh(x) for threshold in [0, 1e-15, 1e-10, 1e-6]: y = corr_nearest(x, n_fact=100, threshold=threshold) evals = np.linalg.eigvalsh(y) #print 'evals', evals, threshold assert_allclose(evals[0], threshold, rtol=1e-6, atol=1e-15) y = corr_clipped(x, threshold=threshold) evals = np.linalg.eigvalsh(y) #print 'evals', evals, threshold assert_allclose(evals[0], threshold, rtol=0.25, atol=1e-15) y = cov_nearest(x, method='nearest', n_fact=100, threshold=threshold) evals = np.linalg.eigvalsh(y) #print 'evals', evals, threshold #print evals[0] / threshold - 1 assert_allclose(evals[0], threshold, rtol=1e-6, atol=1e-15) y = cov_nearest(x, n_fact=100, threshold=threshold) evals = np.linalg.eigvalsh(y) #print 'evals', evals, threshold #print evals[0] / threshold - 1 assert_allclose(evals[0], threshold, rtol=0.25, atol=1e-15)
def test_cov_nearest(self): x = self.x res_r = self.res y = cov_nearest(x, method='nearest') #print np.max(np.abs(x - y)) assert_almost_equal(y, res_r.mat, decimal=3) d = norm_f(x, y) assert_allclose(d, res_r.normF, rtol=0.001) y = cov_nearest(x, method='clipped') #print np.max(np.abs(x - y)) assert_almost_equal(y, res_r.mat, decimal=2) d = norm_f(x, y) assert_allclose(d, res_r.normF, rtol=0.15)
def _generate_cov_matrix(nexog, nendog, ninstruments, collinearity, endogeneity, instr_strength): exog_names, endog_names, instr_names = _variable_names( nexog, nendog, ninstruments) cols = exog_names + endog_names + instr_names + ["epsilon"] cov = np.zeros((len(cols), len(cols))) upper_indices = np.triu_indices(len(cols), k=1) nupper = len(upper_indices[0]) cov[upper_indices] = np.random.uniform(low=-0, high=0.1, size=nupper) cov_df = pd.DataFrame(data=cov, columns=cols, index=cols) cov_df.loc["exog_0", "exog_1"] = collinearity higher_weight = 0.5 + 0.5 * (1 - collinearity) lower_weight = 0.5 - 0.5 * (1 - collinearity) cov_df.loc["exog_0", cols[2:]] = (higher_weight * cov_df.loc["exog_0", cols[2:]] + lower_weight * cov_df.loc["exog_1", cols[2:]]) cov_df.loc["exog_1", cols[2:]] = (higher_weight * cov_df.loc["exog_1", cols[2:]] + lower_weight * cov_df.loc["exog_0", cols[2:]]) cov_df.loc[exog_names + instr_names, "epsilon"] = 0 cov_df.loc[endog_names, instr_names] = instr_strength cov_df.loc[endog_names, "epsilon"] = endogeneity cov = cov_df.values cov += cov.T cov[np.diag_indices(len(cols))] = 1 cov = cov_nearest(cov, method="nearest", threshold=1e-10, n_fact=10) return cov
def optimize(t, sigma, pi): nonlocal last_solution nr_of_assets = len(sigma) # only optimize if we have a re-balance trigger (early exit) if last_solution is not None and last_solution.sum() > 0.99: # so we had at least one valid solution in the past # we can early exit if we do not have any signal or or no signal for any currently hold asset if len(t.shape) > 1 and t.shape[1] == nr_of_assets: if t[:, last_solution >= 0.01].sum().any() < 1: return keep_solution else: if t.sum().any() < 1: return keep_solution # make sure covariance matrix is positive definite simga = cov_nearest(sigma) # we perform optimization except when all expected returns are < 0 # then we early exit with an un-invest command if len(pi[:, pi[0] < 0]) == pi.shape[1]: return uninvest else: try: sol = solve_qp(risk_aversion * sigma, -pi.T, G=G, h=h, A=A, b=b, solver=solver) if sol is None: _log.error("no solution found") return uninvest else: return sol except Exception as e: _log.error(traceback.format_exc()) return uninvest
def do_normalized_pca(df, df_dist, dist_func=lambda x: 1 / x**2, do_build_and_clean=True, supervised=False, supervised_t=0, labels=None): ''' fname_dist_matrix- must be a pickled dataframe st shape is square, symmetric is in the format saved in `build_distance_matrix` df - some data frame of patient data `do_build_and_clean`: if False, do not do centring or normalizing. The caller must at least do centering on `df` in this case first, otherwise the results don't make sense ''' # read in distance matrix and restrict to only those samples in df.index (row and column) # df_dist = pd.read_pickle(fname_dist_matrix) df.index.difference(df_dist.index).size==0 \ and df_dist.index.difference(df.index).size==0 L_weight = dist_func(df_dist) L_weight[L_weight == np.inf] = 0 # send inf's to zero # do supervised PCA adjustment if applicable if supervised: if labels is None: raise ValueError( "If running supervised=True, must supply labels lookup table") L_weight = get_supervised_t_weights(L_weight, labels, t=supervised_t) L_weight = symmetrize(L_weight) np.fill_diagonal(L_weight.values, 0) np.fill_diagonal(L_weight.values, -L_weight.sum()) L_weight = -L_weight L_weight = cov_nearest(L_weight) # Code to handle the case that cov_nearest gives a matrix with one very small negative # eigenvalue, that makes the matrix not PSD # Solution is to add epsilon-Identity, where epsilon is magnitude of the smallest eig # but only do this if the perturbation this would cause is very small, as measured by # the smallest diagonal. If it would cause a big perturbation, throw an error eigs, _ = np.linalg.eig(L_weight) smallest_eig = min(eigs[0], 0) smallest_diag = np.min(np.diag(L_weight)) rel_perturbation = abs(smallest_eig / smallest_diag) if rel_perturbation > 1e-5: raise ValueError("L_weight is non-neglegibly far from the PSD cone") L_weight = L_weight + abs(smallest_eig) * 10 * np.identity(len(L_weight)) L = np.linalg.cholesky(L_weight) # clean non-variant alleles from df and build matrix if do_build_and_clean: M = build_matrix(clean(df)) else: M = df.values A = L.dot(M) ret_pca = do_pca(A, n_components=10) return ret_pca
def test_corr_psd(): # test positive definite matrix is unchanged x = np.array([[1, -0.2, -0.9], [-0.2, 1, -0.2], [-0.9, -0.2, 1]]) y = corr_nearest(x, n_fact=100) #print np.max(np.abs(x - y)) assert_almost_equal(x, y, decimal=14) y = corr_clipped(x) assert_almost_equal(x, y, decimal=14) y = cov_nearest(x, n_fact=100) assert_almost_equal(x, y, decimal=14) x2 = x + 0.001 * np.eye(3) y = cov_nearest(x2, n_fact=100) assert_almost_equal(x2, y, decimal=14)
def test_corrpsd_threshold(threshold): x = np.array([[1, -0.9, -0.9], [-0.9, 1, -0.9], [-0.9, -0.9, 1]]) y = corr_nearest(x, n_fact=100, threshold=threshold) evals = np.linalg.eigvalsh(y) assert_allclose(evals[0], threshold, rtol=1e-6, atol=1e-15) y = corr_clipped(x, threshold=threshold) evals = np.linalg.eigvalsh(y) assert_allclose(evals[0], threshold, rtol=0.25, atol=1e-15) y = cov_nearest(x, method='nearest', n_fact=100, threshold=threshold) evals = np.linalg.eigvalsh(y) assert_allclose(evals[0], threshold, rtol=1e-6, atol=1e-15) y = cov_nearest(x, n_fact=100, threshold=threshold) evals = np.linalg.eigvalsh(y) assert_allclose(evals[0], threshold, rtol=0.25, atol=1e-15)
def cov_matrix(symbols, start, end, interval='m'): if interval == 'y': data = [numpy.array(get_yr_returns(s, start, end)) for s in symbols] else: data = [ numpy.array(get_returns(s, start, end, interval)) for s in symbols ] x = numpy.array(data) return cov_nearest(numpy.cov(x))
def test_cov_nearest(self): x = self.x res_r = self.res with warnings.catch_warnings(): warnings.simplefilter("ignore") y = cov_nearest(x, method='nearest', threshold=1e-7) #print np.max(np.abs(x - y)) assert_almost_equal(y, res_r.mat, decimal=2) d = norm_f(x, y) assert_allclose(d, res_r.normF, rtol=0.0015)
def fix_numerical_problem(k: np.ndarray, tolerance: float) -> np.ndarray: """ :param k: :param tolerance: :return: """ k = cov_nearest(k, threshold=tolerance) cholesky_k = np.linalg.cholesky(k).T return cholesky_k
def add_noise(theta, p, alpha, threshold=0.1): noise_mat = make_sparse_spd_matrix(dim=p, alpha=alpha, norm_diag=False, smallest_coef=-threshold, largest_coef=threshold) np.fill_diagonal(theta, 0.0) theta_star = cov_nearest(noise_mat + theta, method="clipped", threshold=0.1) return theta_star
def _gista(self, theta0, S, _lambdas, verbose=False): """ G-ISTA algorithm https://papers.nips.cc/paper/4574-iterative-thresholding-algorithm-for-sparse-inverse-covariance-estimation.pdf """ theta = theta0 t = min(np.linalg.eigvals(theta0))**2 p = len(theta) if verbose: print(f'f(X,S) = {self.sfunc.eval(theta0, S)}') print(f'g(X,rho) = {self.nsfunc.eval(theta0, _lambdas)}') print( f'Initial Objective: {self._pgm_objective(theta0, S, _lambdas)}' ) if self._pgm_objective(self.theta0, S, _lambdas) > 10000: # Skip, bad starting point theta = make_sparse_spd_matrix(p, alpha=0.5, norm_diag=False, smallest_coef=-1.0, largest_coef=1.0) for i in range(self.max_iters): if not _is_pos_def(theta): print('Clipped Precision matrix') theta = cov_nearest(theta, method="clipped", threshold=0.1) if self.ss_type == 'backtracking': t = self._step_size(theta, S, _lambdas, t) delta = self._duality_gap(p, theta, S, _lambdas) if verbose: print(f'Duality Gap: {delta}.') if delta < self.epsilon and self.dual_gap: print(f'iterations: {i}') print(f'Duality Gap: {delta} < {self.epsilon}. Exiting.') break theta_k1 = self.nsfunc.prox( theta - t * self.sfunc.gradient(theta, S), _lambdas) if self.ss_type == 'backtracking': t = _set_next_inital_step_size(theta_k1, theta) theta = theta_k1 return theta
def r_fit_ellipse(X, confidence=0.9, n=100): from statsmodels.stats.correlation_tools import cov_nearest from scipy.stats import f cov = cov_nearest(np.cov(X.T), method='clipped', threshold=1e-5) center = X.mean(axis=0).reshape(-1, 1) chol_decomp = np.linalg.cholesky(cov) radius = np.sqrt(2 * f.ppf(confidence, 2, X.shape[0]-1)) angles = np.arange(n) * 2 * pi/n unit_circle = np.vstack((np.cos(angles), np.sin(angles))).T ellipse = (center + radius * (unit_circle.dot(chol_decomp)).T).T return ellipse
def near_psd(cov, method='clipped', threshold=1e-15, n_fact=100) -> np.ndarray: """ Finds the nearest covariance matrix that is positive (semi-) definite This converts the covariance matrix to a correlation matrix. Then, finds the nearest correlation matrix that is positive semi-definite and converts it back to a covariance matrix using the initial standard deviation. The smallest eigenvalue of the intermediate correlation matrix is approximately equal to the ``threshold``. If the threshold=0, then the smallest eigenvalue of the correlation matrix might be negative, but zero within a numerical error, for example in the range of -1e-16. Input covariance matrix must be symmetric. Parameters ---------- cov: (N, N) array like Initial covariance matrix method: { 'clipped', 'nearest' }, optional If "clipped", this function clips the eigenvalues, replacing eigenvalues smaller than the threshold by the threshold. The new matrix is normalized, so that the diagonal elements are one. Compared to "nearest", the distance between the original correlation matrix and the positive definite correlation matrix is larger. However, it is much faster since it only computes eigenvalues once. If "nearest", then the function iteratively adjust the correlation matrix by clipping the eigenvalues of a difference matrix. The diagonal elements are set to one. threshold: float Clipping threshold for smallest eigenvalue n_fact: int Factor to determine the maximum number of iterations if method is set to "nearest" Returns ------- ndarray positive semi-definite matrix """ cov = np.asarray(cov) if not is_symmetric(cov): raise ValueError('covariance matrix must be symmetric') if is_psd(cov): return cov return cov_nearest(cov, method, threshold, n_fact, False)
def test_psd_norm(self): data = {'AAXJ': [66.029999000000004, 63.0, 59.270000000000003, 53.340000000000003, 52.75], 'UBU': [20.079999999999998, 20.079999999999998, 21.550000000000001, 20.559999999999999, 20.18], 'ALD': [45.939999, 45.330002, 44.490001999999997, 42.729999999999997, 42.409999999999997], 'VSO': [47.399999999999999, 42.899999999999999, 43.340000000000003, 41.719999999999999, 40.950000000000003], 'VAS': [73.700000000000003, 69.989999999999995, 72.099999999999994, 66.569999999999993, 64.549999999999997], 'BTWJPNF_AU': [0.66000000000000003, 0.66000000000000003, 0.68999999999999995, 0.67000000000000004, 0.63], 'VGS': [59.75, 58.439999999999998, 61.0, 58.25, 56.780000000000001], 'EMB': [112.370003, 109.91999800000001, 109.660004, 108.010002, 106.400002], 'FTAL': [41.854999999999997, 39.329999999999998, 40.390000000000001, 38.32, 37.229999999999997], 'UBP': [20.150717539569801, 19.1999999999999, 19.050000000000001, 17.990000000000101, 17.240000000000101], 'BTWASHF_AU': [1.8799999999999999, 1.8400000000000001, 1.8799999999999999, 1.8400000000000001, 1.8400000000000001], 'VLC': [64.719999999999999, 61.219999999999999, 63.530000000000001, 57.469999999999999, 55.170000000000002], 'MCHI': [59.849997999999999, 56.040000999999997, 50.040000999999997, 44.099997999999999, 43.810001], 'UBE': [20.983828369806702, 20.140000000000001, 21.510000000000002, 20.1099999999999, 19.75], 'BTA0420_AU': [1.1799999999999999, 1.1299999999999999, 1.0700000000000001, 1.02, 1.0], 'SLXX': [136.13999999999999, 131.22, 134.57499999999999, 130.71000000000001, 131.46000000000001], 'VTS': [143.81, 139.49000000000001, 149.49000000000001, 143.16, 139.47], 'RGB': [21.379999999999999, 21.0, 21.280000000000001, 21.399999999999999, 21.52], 'IJP': [17.239999999999998, 16.710000000000001, 17.68, 16.98, 16.09], 'HOW0062_AU': [1.05, 1.05, 1.0, 1.01, 1.02], 'DSUM': [24.91, 24.739999999999998, 24.510000000000002, 23.040001, 23.559999000000001], 'ILB': [115.41, 113.8, 114.0, 114.56, 114.31999999999999], 'PEBIX_US': [9.9499999999999993, 10.529999999999999, 10.19, 10.1, 9.7400000000000002], 'BTWFAUS_AU': [1.74, 1.6499999999999999, 1.73, 1.5900000000000001, 1.5600000000000001], 'BTWEUSH_AU': [1.3200000000000001, 1.29, 1.3799999999999999, 1.3500000000000001, 1.3200000000000001], 'IEAG': [87.209999999999994, 83.355000000000004, 84.674999999999997, 87.055000000000007, 87.405000000000001], 'RSM': [20.789999999999999, 20.550000000000001, 20.77, 20.850000000000001, 20.629999999999999], 'ROTHWSE_AU': [2.6400000000000001, 2.4700000000000002, 2.3999999999999999, 2.3300000000000001, 2.3900000000000001], 'UBA': [19.886842423199901, 18.6400000000001, 19.129999999999999, 17.440000000000001, 16.879999999999999], 'IUSB': [101.769997, 100.519997, 100.459999, 100.389999, 100.25], 'ROTHFXD_AU': [1.23, 1.21, 1.1899999999999999, 1.21, 1.21], 'UBJ': [20.763995359855802, 20.479000000000099, 21.379999999999999, 20.549999999999901, 19.469999999999999], 'IEU': [61.130000000000003, 57.57, 61.130000000000003, 58.340000000000003, 56.100000000000001], 'VGE': [62.549999999999997, 60.229999999999997, 58.549999999999997, 53.600000000000001, 52.880000000000003], 'RIGS': [25.25, 24.940000999999999, 24.940000999999999, 24.549999, 24.100000000000001], 'VHY': [69.030000000000001, 65.040000000000006, 64.150000000000006, 59.219999999999999, 57.100000000000001], 'UBW': [21.244103679132198, 20.510000000000002, 21.620000000000001, 19.779999999999902, 20.079999999999998], 'BOND': [26.280000000000001, 25.800000000000001, 26.030000000000001, 26.23, 26.02], 'BTWAMSH_AU': [1.23, 1.21, 1.24, 1.21, 1.1799999999999999]} df = pd.DataFrame(data) df_cov = df.cov() #print(df_cov) p1 = cov_nearest(df_cov)
def gauss_low_filter(self): noise_var = 1 #conditioning propblems K_ry = self.kernel_conv(flag=2) K_ry = K_ry.T K_y = K_SE(self.x, self.x, self.gamma, self.sigma) + self.noise * np.eye(self.Nx) self.filtered = np.matmul(K_ry, np.linalg.inv(K_y)).dot(self.y) M = K_ry @ np.linalg.solve( K_y + noise_var * np.eye(m), K_ry.T ) #Here we use noise_var as there may be conditioning problems K_rr = self.kernel_conv(flag=1) # print(np.linalg.eigvals(K_rr).min()) K_rr_parche = cov_nearest(K_rr) + 1e-8 * np.eye(len(self.time)) self.Covariance = (K_rr_parche - M) self.error_bar = 2 * np.sqrt(np.diag(self.Covariance)) self.filt_spect = 2.0 / self.grid_num * np.abs( fft(self.filtered)[:int(self.grid_num / 2)])
def cov_fix(cov, method="clipped", **kwargs): r""" Fix a covariance matrix to a positive definite matrix. Parameters ---------- cov : nd-array of shape (n_features, n_features) Features covariance matrix, where n_features is the number of features. method : str The default value is 'clipped', see more in `cov_nearest <https://www.statsmodels.org/stable/generated/statsmodels.stats.correlation_tools.cov_nearest.html>`_. **kwargs Other parameters from `cov_nearest <https://www.statsmodels.org/stable/generated/statsmodels.stats.correlation_tools.cov_nearest.html>`_. Returns ------- cov_ : bool A positive definite covariance matrix. Raises ------ ValueError when the value cannot be calculated. """ flag = False if isinstance(cov, pd.DataFrame): cols = cov.columns.tolist() flag = True cov_ = np.array(cov, ndmin=2) cov_ = cov_nearest(cov_, method=method, **kwargs) cov_ = np.array(cov_, ndmin=2) if flag: cov_ = pd.DataFrame(cov_, index=cols, columns=cols) return cov_
def preproc_sm(sm, confounders, final_deconfound=True, feature_names=None, hcp_data_dict_correct_pct_to_t=True, nearest_psd_threshold=1e-6): """Preprocessing of subject measures. Parameters ---------- confounders : np.ndarray (n_samples, n_features) confounder data matrix sm : pd.DataFrame (n_samples, n_Y_features) behavioral and demographic data matrix. Names of features to include, and confounds must be column names final_deconfound : bool if ``True`` the final scores are once more deconfounded before they are returned feature_names : None, slice or list-like names of features to use, names must be columns in ``sm``. If ``None`` default (i.e. from Smith et al. 2015, applicable to HCP data) feature names are used hcp_data_dict_correct_pct_to_t : bool whether to correct HCP data dict names, see :func:`_check_features` nearest_psd_threshold : float threshold for finding an acceptable nearest positive definite matrix Returns ------- uu2 : np.ndarray (n_samples, n_Y_features) processed dataset Y uu2_white : np.ndarray (n_samples, n_Y_features) whitened processed dataset Y S4_raw : np.ndarray (n_samples, n_Y_features) unprocessed Y data comprising only the selected features feature_names : list ordered list of feature names corresponding to the columns of Y """ S4_raw, S4_deconfounded, feature_names = prepare_sm( sm, confounders, feature_names, hcp_data_dict_correct_pct_to_t) # estimate covariance-matrix, ignoring missing values # NOTE: This is the n_subjects x n_subjects covariance matrix across # features! S_cov = np.nan * np.empty((S4_raw.shape[0], S4_raw.shape[0])) for i in trange(len(S_cov), desc='subject', leave=False): for j in range(i + 1): mask = np.isfinite(S4_deconfounded[i]) \ & np.isfinite(S4_deconfounded[j]) S_cov[i, j] = S_cov[j, i] = np.cov(S4_deconfounded[i, mask], S4_deconfounded[j, mask])[0, 1] assert np.isfinite(S_cov).all() S_cov_psd = cov_nearest(S_cov, threshold=nearest_psd_threshold) assert np.isfinite(S_cov_psd).all() assert np.allclose(S_cov_psd, S_cov_psd.T) assert np.linalg.matrix_rank(S_cov_psd) == len(S_cov_psd) print('smallest sval S_cov =', np.linalg.svd(S_cov, compute_uv=False, hermitian=True).min()) print('smallest sval S_cov_psd =', np.linalg.svd(S_cov_psd, compute_uv=False, hermitian=True).min()) print('rank S_cov =', np.linalg.matrix_rank(S_cov)) print('rank S_cov_psd =', np.linalg.matrix_rank(S_cov_psd)) # --- PCA --- dd2, uu2 = np.linalg.eigh(S_cov_psd) assert np.allclose((uu2**2).sum(0), 1) order = np.argsort(dd2)[::-1] dd2 = dd2[order] uu2 = uu2[:, order] assert np.all(np.diff(dd2) <= 0) uu2_white = uu2 / uu2.std(0) uu2 = uu2 * (np.sqrt(dd2) / uu2.std(0)).reshape(1, -1) # uu2 doesn't have mean 0, probably because of the way it's computed, # i.e. with cov_nearest, ... #assert np.allclose(uu2.mean(0), 0) assert np.allclose(uu2_white.var(0), 1) assert np.allclose(uu2.var(0), dd2) if final_deconfound: # deconfound again, just to be safe uu2_white = deconfound(uu2_white, confounders) uu2 = deconfound(uu2, confounders) return uu2, uu2_white, S4_raw, feature_names
def nearestPSD(P): # other options:not ideal but necessary for robust solutions: # 1) P?1/2P+1/2P' to even out the off-diagonal terms -- for symmetry # 2)Let P=P+eps In×n, where eps is a small scalar to make sure matrix is not ill conditioned # 3) use 64fp arithmetic return cov_nearest(P)
def update_row_col(subproblem1, subproblem2, partition_list, attributes): """Updates the last two rows and columns of COV with the exponential kernel values of the string distances of the corresponding subproblem constraints and finds the nearest covariance matrix """ global COV sigma = attributes['KG_sigma'] l = attributes['KG_l'] s1_const = subproblem1['constraints'] s2_const = subproblem2['constraints'] const_vec = [ partition_list[i]['constraints'] for i in range(len(partition_list) - 2) ] # Obtain constraints as strings s1_const_str = '' for k in s1_const.keys(): s1_const_str += str(s1_const[k]) s2_const_str = '' for k in s2_const.keys(): s2_const_str += str(s2_const[k]) const_str = [] for d in const_vec: d_str = '' for k in d.keys(): d_str += str(d[k]) const_str.append(d_str) const_str += [s1_const_str, s2_const_str] # Calculate 1st column to fill dist1 = np.array([l_dist(s1_const_str, string) for string in const_str]) col1 = sigma**2 * np.exp(-np.square(dist1) / (2 * l**2)) #col1[len(col1)-2] += 2*l**2 #col1[len(col1)-2] += sigma**2 #positive-definite adjustment: max variance # Calculate 1st column to fill dist2 = np.array([l_dist(s2_const_str, string) for string in const_str]) col2 = sigma**2 * np.exp(-np.square(dist2) / (2 * l**2)) #col2[len(col2)-1] += 2*l**2 #col2[len(col2)-1] += sigma**2 #positive-definite adjustment: max variance # Update COV last two rows and columns M = COV.to_numpy() k = M.shape[0] - 1 M[:, k - 1] = col1 M[k - 1, :] = col1 M[:, k] = col2 M[k, :] = col2 M = M.astype(float) M = cov_nearest(M, threshold=1e-6, n_fact=10000, return_all=False) S = COV.to_numpy() S[:, :] = M #DEBUG #if utilities.is_pos_def(M) != True: ##print("Non positive definite matrix M") #raise Exception("Non positive definite matrix M") #if utilities.is_pos_semi_def(M) != True: ##print("Non positive semidefinite matrix M") #raise Exception("Non positive semidefinite matrix M") return
def covariance_matrix_solve(self, expval, index, stdev, rhs): """ Solves matrix equations of the form `covmat * soln = rhs` and returns the values of `soln`, where `covmat` is the covariance matrix represented by this class. Parameters ---------- expval: array-like The expected value of endog for each observed value in the group. index: integer The group index. stdev : array-like The standard deviation of endog for each observation in the group. rhs : list/tuple of array-like A set of right-hand sides; each defines a matrix equation to be solved. Returns ------- soln : list/tuple of array-like The solutions to the matrix equations. Notes ----- Returns None if the solver fails. Some dependence structures do not use `expval` and/or `index` to determine the correlation matrix. Some families (e.g. binomial) do not use the `stdev` parameter when forming the covariance matrix. If the covariance matrix is singular or not SPD, it is projected to the nearest such matrix. These projection events are recorded in the fit_history member of the GEE model. Systems of linear equations with the covariance matrix as the left hand side (LHS) are solved for different right hand sides (RHS); the LHS is only factorized once to save time. This is a default implementation, it can be reimplemented in subclasses to optimize the linear algebra according to the struture of the covariance matrix. """ vmat, is_cor = self.covariance_matrix(expval, index) if is_cor: vmat *= np.outer(stdev, stdev) # Factor the covariance matrix. If the factorization fails, # attempt to condition it into a factorizable matrix. threshold = 1e-2 success = False cov_adjust = 0 for itr in range(20): try: vco = spl.cho_factor(vmat) success = True break except np.linalg.LinAlgError: vmat = cov_nearest(vmat, method=self.cov_nearest_method, threshold=threshold) threshold *= 2 cov_adjust += 1 self.cov_adjust.append(cov_adjust) # Last resort if we still can't factor the covariance matrix. if not success: warnings.warn( "Unable to condition covariance matrix to an SPD " "matrix using cov_nearest", ConvergenceWarning) vmat = np.diag(np.diag(vmat)) vco = spl.cho_factor(vmat) soln = [spl.cho_solve(vco, x) for x in rhs] return soln
def bl_model(sigma, w_tilde, p, v, n, c=1.0, lambda_bar=1.2): """ This is an implementation of the Black-Litterman model based on Meucci's article: http://papers.ssrn.com/sol3/papers.cfm?abstract_id=1117574 Argument Definitions: Required: :param sigma: nxn numpy array covariance matrix of the asset return time series :param w_tilde: nx1 numpy array market cap portfolio weights :param p: mxn numpy array corresponding to investor views on future asset movements :param v: mx1 numpy array of expected returns of portfolios corresponding to views :param n: length of time series of returns used to compute covariance matrix :param c: constant representing overall confidence in the views return estimator :param lambda_bar: risk-aversion level which Black and Litterman set to 1.2 Argument Constraints: Required: sigma -- positive definite symmetric matrix w_tilde -- vector with positive entries that sum to one p -- matrix of positive or negative floats v -- matrix of positive or negative floats Optional: c -- any positive float, default to 1 (as in example on page 5) lambda_bar -- positive float, default to 1.2 as mentioned after equation (5) """ logger.debug("Running BL with " "sigma:\n{}\nw_tilde:\n{}\np:\n{}\nv:\n{}\nn:{}".format( sigma, w_tilde, p, v, n)) pi = 2.0 * lambda_bar * np.dot(sigma, w_tilde) # equation (5) tau = 1.0 / float(n) # equation (8) omega = np.dot(np.dot(p, sigma), p.T) / c # equation (12) # Main model, equations (20) and (21) m1 = np.dot(tau * np.dot(sigma, p.T), inv(tau * np.dot(p, np.dot(sigma, p.T)) + omega)) m2 = v - np.dot(p, pi) m3 = np.dot(p, sigma) mu_bl = pi + np.dot(m1, m2) sig_bl = (1.0 + tau) * sigma - tau * np.dot(m1, m3) # Make the matrix symmetric sym_bl = (sig_bl + sig_bl.T) / 2 # The cov matrix may have not been strictly pos semi definite # due to rounding etc. Make sure it is. psd_bl = cov_nearest(sym_bl) return mu_bl, psd_bl
def covariance_matrix_solve(self, expval, index, stdev, rhs): """ Solves matrix equations of the form `covmat * soln = rhs` and returns the values of `soln`, where `covmat` is the covariance matrix represented by this class. Parameters ---------- expval: array-like The expected value of endog for each observed value in the group. index: integer The group index. stdev : array-like The standard deviation of endog for each observation in the group. rhs : list/tuple of array-like A set of right-hand sides; each defines a matrix equation to be solved. Returns ------- soln : list/tuple of array-like The solutions to the matrix equations. Notes ----- Returns None if the solver fails. Some dependence structures do not use `expval` and/or `index` to determine the correlation matrix. Some families (e.g. binomial) do not use the `stdev` parameter when forming the covariance matrix. If the covariance matrix is singular or not SPD, it is projected to the nearest such matrix. These projection events are recorded in the fit_history member of the GEE model. Systems of linear equations with the covariance matrix as the left hand side (LHS) are solved for different right hand sides (RHS); the LHS is only factorized once to save time. This is a default implementation, it can be reimplemented in subclasses to optimize the linear algebra according to the struture of the covariance matrix. """ vmat, is_cor = self.covariance_matrix(expval, index) if is_cor: vmat *= np.outer(stdev, stdev) # Factor the covariance matrix. If the factorization fails, # attempt to condition it into a factorizable matrix. threshold = 1e-2 success = False cov_adjust = 0 for itr in range(20): try: vco = spl.cho_factor(vmat) success = True break except np.linalg.LinAlgError: vmat = cov_nearest(vmat, method=self.cov_nearest_method, threshold=threshold) threshold *= 2 cov_adjust += 1 self.cov_adjust.append(cov_adjust) # Last resort if we still can't factor the covariance matrix. if success == False: warnings.warn( "Unable to condition covariance matrix to an SPD matrix using cov_nearest", ConvergenceWarning) vmat = np.diag(np.diag(vmat)) vco = spl.cho_factor(vmat) soln = [spl.cho_solve(vco, x) for x in rhs] return soln
def simulate(A, T, P, K, J, D): """ Simulates a model from framework 2. Returns the regressor object, F0, G0, Gamma and epsilon """ ## F is autoregressive with matrix A e = np.random.randn(K, T) for i in range(1, T): e[:, i:i + 1] += np.matmul(A, e[:, i - 1:i]) F = e.T ## X is standard normal 1 covariate only X = np.random.randn(P, D) ## Phi is generated from three functions Phi = np.zeros((P, J)) Phi[:, 0:1] = X Phi[:, 1:2] = X**2 - 1 Phi[:, 2:3] = X**3 - 2 * X ## Random loadings B = np.random.randn(J, K) G = np.matmul(Phi, B) H = co_diagonalize(np.matmul(F.T, F), np.linalg.pinv(np.matmul(G.T, G))) H = np.sqrt(T) * H ## Rotate loadings and factors F0 = np.matmul(F, H) G0 = np.matmul(G, np.linalg.pinv(H.T)) ## Simulate the noise alpha = 7.06 beta = 536.93 mu = -0.0019 sig = 0.1499 Diag = np.diag(np.random.gamma(alpha, 1 / beta, size=P)) sigma0 = np.random.normal(mu, sig, size=(P, P)) for i in range(P): sigma0[i, i] = 1.0 for i in range(P): for j in range(i, P): sigma0[i, j] = sigma0[j, i] sigma0 = sigma0 * (np.abs(sigma0) > 0.03) sigma0 = cov_nearest(sigma0, threshold=1e-10) covariance = np.matmul(Diag, np.matmul(sigma0, Diag)) noise = np.random.multivariate_normal(np.zeros(P), covariance, T).T ## Finalize the model Y = np.matmul(G, F.T) + noise reg_PPCA = Regressor() # reg_PPCA.add_line([1, 0]) # reg_PPCA.add_square([1, 0, -1]) # reg_PPCA.add_cube([1, 0, -2, 0]) reg_PPCA.add_cubic_splines(X.min(), X.max(), int(3 * (P * min(P, T))**(0.25))) reg_PPCA.fit_PPCA(Y, X, 3, find_K=False) reg_PC = Regressor() reg_PC.add_line([1, 0]) reg_PC.add_square([1, 0, -1]) reg_PC.add_cube([1, 0, -2, 0]) reg_PC.fit_PC(Y, 3, find_K=False) return reg_PPCA, reg_PC, F0, G0, np.zeros((P, K)), noise
# print slope, intercept, r_value, p_value, std_err yy = f(x, slope, intercept) pol_raw0, polcov_raw0 = sciopt.curve_fit(f, x, y, sigma=y_err, p0=[intercept, slope], maxfev=1200) pol_raw_err0 = np.sqrt(np.diag(polcov_raw0)) # cov1 = lftools._get_covariance_matrix_from_raw(y_raw) # cov1 = cov_nearest(np.cov(y_raw), method="nearest", threshold=9e-16, n_fact=500) # cov1 = near_psd(np.cov(y_raw)) # cov1 = nearPSD(np.cov(y_raw)) cov1 = cov_nearest(np.cov(y_raw), method="nearest") # print np.linalg.eigvals(cov1).min() pol_raw1, polcov_raw1 = sciopt.curve_fit(f, x, y, sigma=cov1, p0=[intercept, slope], maxfev=2000, ftol=1e-16, epsfcn=1e-10, xtol=1e-10) pol_raw_err1 = np.sqrt(np.diag(polcov_raw1)) # cov2 = lftools._get_covariance_matrix_from_raw(V, iscov=True) cov2 = cov_nearest(V, method="nearest", threshold=9e-16, n_fact=500) pol_raw2, polcov_raw2 = sciopt.curve_fit(f,