def factor_cov(N, K0=4, seed=23945): """Conventional Factor model covariance""" signature = dict(N=N, K0=K0, K1=0, K2=0, seed=seed) fm = fm_(**signature) Sigma = fm.covariance() tau = eig(Sigma, return_eigenvectors=False) return Sigma, tau
def minvar_nls_loo(sim): T, N = sim.shape X = sim.X P = np.zeros((N, N)) q = np.zeros(N) for k in range(T): _k = list(range(T)) del _k[k] S_k = cov(X[_k, :]) _, U_k = eig(S_k) Xk = X[k].reshape(N, 1) C_k = U_k.T @ Xk @ Xk.T @ U_k alpha_k = U_k.T @ np.ones(N) A_k = np.diag(alpha_k) P += A_k @ C_k.T @ C_k @ A_k q += -A_k @ C_k.T @ alpha_k #@for z = np.linalg.solve(P, -q) d = 1 / z return d
def SLR_cov(N, K0=4, K1=32, K2=16, seed=23945): """SLR Covariance Matrix""" signature = dict(N=N, K0=K0, K1=K1, K2=K2, seed=seed) fm = fm_(**signature) Sigma = fm.covariance() tau = eig(Sigma, return_eigenvectors=False) return Sigma, tau
def __init__(self, Sigma, T): ''' Simulation class with given covariance matrix. Stores popualtion eigenvalues and eigenvectors (tau, V respectively) ,number of data points (T) and features (N) ''' self.Sigma = Sigma self.tau, self.V = eig(Sigma) self.N = Sigma.shape[0] self.T = T self.seed = None
def pca_whitening(x, k=0): ''' Analise de componentes principais da matriz de dados X com branqueamento dos dados. Entrada: x: matriz N x M em que cada linha representa uma amostra com M atributos. k: numero de componentes principais a serem utilizadas. Retorna: xhat: retorna os dados na nova base utilizando K componentes principais. wl: k autovalores. vl: k autovetores que transformam os dados em x para xhat. ''' import numpy as np import utils n = x.shape[0] # numero de amostras if k == 0: k = n elif k > n: k = n # calcula a matriz de correlacao c = utils.correlacao(x) # calcula os autovalores / autovetores da matriz acima w, v = utils.eig(c) index = np.argsort(w) # coloca os autovalores de forma crescente aux = [i for i in index[-1::-1]] index = aux # agora de forma descrescente w = w[index] v = v[:, index] wl = w[:k] m = v[:, :k] mu, var = utils.calcula_media_variancia(x) xhat = (x - mu) / np.sqrt(var) v = np.zeros((k, k)) for i in range(0, k): v[i, i] = np.power(wl[i], -0.5) vl = np.transpose(v) omega = np.dot(xhat, m) omega = np.real(np.dot(omega, vl)) return omega, vl, m
def _nls_cv(sim, K): T, N = sim.shape X, S = sim.X, sim.S m = int(T / K) d = np.zeros(N) for k in range(K): k_set = list(range(k * m, (k + 1) * m)) X_k = X[k_set, :] S_k = (T * S - X_k.T.dot(X_k)) / (T - m) _, U_k = eig(S_k) tmp = (U_k.T.dot(X_k.T)**2).sum(axis=1) d += tmp / T return d
def nls_loo(X, S, U, progress=False): """Leave-One-Out cross-validated eigenvalues for LW nonlinear shrinkage""" T, N = X.shape d = np.zeros(N) if progress: pbar = tqdm(total=T) for k in range(T): x_k = X[k, :] S_k = (T * S - np.outer(x_k, x_k)) / (T - 1) _, U_k = eig(S_k) d += U_k.T.dot(x_k)**2 / T if progress: pbar.update() return U, d
def minvar_nls_kfold_oracle(sim, K=10, progress=False, trace=False, upper_bound=True): """ Oracle/K-fold cross-validated eigenvalues for new MinVar nonlinea shrinkage. """ T, N = sim.shape S, Sigma = sim.S, sim.Sigma X = sim.X lam = sim.lam m = int(T / K) C_list = [] alpha_list = [] if progress: pbar = tqdm(total=K) for k in range(K): k_set = list(range(k * m, (k + 1) * m)) X_k = X[k_set, :] S_k = (T * S - X_k.T.dot(X_k)) / (T - m) _, U_k = eig(S_k) C = U_k.T.dot(Sigma).dot(U_k) C_list.append(C) alpha = U_k.T.dot(np.ones(N)) alpha_list.append(alpha) if progress: pbar.update() d_min, d_max = lam[-1], lam[0] d_kfold = nls_kfold(sim, K) d_isokfold = isotonic_regression(d_kfold) if trace: trace = np.sum(d_isokfold) d = minvar_nls_nlsq_multi_transformed(C_list, alpha_list, trace, d_isokfold, d_min, d_max, upper_bound) else: trace = None d = minvar_nls_nlsq_multi(C_list, alpha_list, trace, d_isokfold, d_min, d_max, upper_bound) return d
def nls_kfold(X, S, U, K=10, progress=False): """K-fold cross-validated eigenvalues for LW nonlinear shrinkage""" T, N = X.shape m = int(T / K) d = np.zeros(N) if progress: pbar = tqdm(total=K) for k in range(K): k_set = list(range(k * m, (k + 1) * m)) X_k = X[k_set, :] S_k = (T * S - X_k.T.dot(X_k)) / (T - m) _, U_k = eig(S_k) tmp = (U_k.T.dot(X_k.T)**2).sum(axis=1) d += tmp / T if progress: pbar.update() return U, d
def minvar_joint_kfold_isotonic(sim, K, smoothing='average', nonnegative=False, regularization=None): """ Base Estimator 4: MinVar $K$-Fold Joint Cross-Validation with Isotonic Regression Parameters: + K Variants: + Smoothing could be average or median. + Nonnegativity constraint + Regularization: 'l2' for now, maybe others """ T, N = sim.shape m = int(T / K) X, S = sim.X, sim.S P = np.zeros((N, N)) q = np.zeros(N) for k in range(K): k_set = list(range(k * m, (k + 1) * m)) _k = np.delete(range(T), k_set) X_k = X[k_set, :] S_k = (T * S - X_k.T @ X_k) / (T - m) # 1/(T-m) * X[_k,:].T @ X[_k,:] _, U_k = eig(S_k) alpha_k = U_k.T @ np.ones(N) C_k = U_k.T @ (1 / m * X_k.T @ X_k) @ U_k A_k = np.diag(alpha_k) P = P + (A_k @ C_k.T @ C_k @ A_k) q = q + (A_k @ C_k.T @ alpha_k) if nonnegative: z = nnlsq_regularized(P, -q, lmbda=0) interpolate_zeros(z) else: z = np.linalg.solve(P, -q) d = 1 / z d = isotonic_regression(d) return d
def __init__( self, T, kernel="RBF", length_scale=1, num_eig=10, N=100, interp="cubic" ): if not np.isclose(T, 1): raise ValueError("Only support T = 1.") self.num_eig = num_eig if kernel == "RBF": kernel = gp.kernels.RBF(length_scale=length_scale) elif kernel == "AE": kernel = gp.kernels.Matern(length_scale=length_scale, nu=0.5) eigval, eigvec = eig(kernel, num_eig, N, eigenfunction=True) eigvec *= eigval ** 0.5 x = np.linspace(0, T, num=N) self.eigfun = [ interpolate.interp1d(x, y, kind=interp, copy=False, assume_sorted=True) for y in eigvec.T ]
def minvar_nls_kfold(sim, K, progress=False, trace=False, upper_bound=True): """K-fold cross-validated eigenvalues for new MinVar nonlinear shrinkage""" T, N = sim.shape m = int(T / K) X, S, lam = sim.X, sim.S, sim.lam C_list = [] alpha_list = [] if progress: pbar = tqdm(total=K) for k in range(K): k_set = list(range(k * m, (k + 1) * m)) X_k = X[k_set, :] S_k = (T * S - X_k.T.dot(X_k)) / (T - m) _, U_k = eig(S_k) # this is a joint version. Lists of C and alpha are collected # and the system is solved for one z vector. z is not averaged here # Note that this is also a bona-fide estimator since in C calculation # sample covariance matrix is used. C = U_k.T.dot(X_k.T.dot(X_k)).dot(U_k) C_list.append(C) alpha = U_k.T.dot(np.ones(N)) alpha_list.append(alpha) if progress: pbar.update() d_min, d_max = lam[-1], lam[0] d_kfold = nls_kfold(sim, K) d_isokfold = isotonic_regression(d_kfold) if trace: trace = np.sum(d_isokfold) d = minvar_nls_nlsq_multi_transformed(C_list, alpha_list, trace, d_isokfold, d_min, d_max, upper_bound) else: trace = None d = minvar_nls_nlsq_multi(C_list, alpha_list, trace, d_isokfold, d_min, d_max, upper_bound) return d
def _minvar_nlsq(sim, K, mono=True, upper_bound=True, trace=True, type='kfold'): T, N = sim.shape X, Sigma, S, U = sim.X, sim.Sigma, sim.S, sim.U d_min, d_max = sim.lam_N, sim.lam_1 d0 = nls_kfold(sim, 10, isotonic=mono) if 'kfold' in type: m = int(T / K) C = [] alpha = [] for k in range(K): k_set = list(range(k * m, (k + 1) * m)) X_k = X[k_set, :] S_k = (T * S - X_k.T.dot(X_k)) / (T - m) _, U_k = eig(S_k) if 'oracle' in type: tmp = U_k.T.dot(Sigma).dot(U_k) else: tmp = U_k.T.dot(X_k.T.dot(X_k)).dot(U_k) C.append(tmp) alpha.append(U_k.T.dot(np.ones(N))) else: C = [U.T.dot(Sigma).dot(U)] alpha = [U.T.dot(np.ones(N))] if mono and trace: nlsq_solver = minvar_nlsq_multi_transformed args = (C, alpha, np.sum(d0), d0, d_min, d_max, upper_bound) else: nlsq_solver = minvar_nlsq_multi trace = np.sum(d0) if trace else None args = (C, alpha, trace, d0, d_min, d_max, mono, upper_bound) d = nlsq_solver(*args) return d
def KL(): l = 0.2 N = 1000 kernel = gp.kernels.RBF(length_scale=l) # kernel = gp.kernels.Matern(length_scale=l, nu=0.5) # AE # kernel = gp.kernels.Matern(length_scale=l, nu=2.5) eigval, eigfun = eig(kernel, 10, N, eigenfunction=True) print(eigval) variance = 0.999 s = np.cumsum(eigval) idx = np.nonzero(s > variance)[0][1] print(idx + 1) x = np.linspace(0, 1, num=N) plt.plot(x, eigfun[:, 0]) plt.plot(x, eigfun[:, idx - 1]) plt.plot(x, eigfun[:, idx]) plt.show()
def pca(x, k=0): ''' Analise de componentes principais da matriz de dados X. Entrada: x: matriz N x M em que cada linha representa uma amostra com M atributos. k: numero de componentes principais a serem utilizadas. Retorna: xhat: retorna os dados na nova base utilizando K componentes principais. wl: k autovalores. vl: k autovetores que transformam os dados em x para xhat. ''' import numpy as np import utils n = x.shape[0] # numero de amostras if k == 0: k = n elif k > n: k = n # calcula a matriz de correlacao c = utils.correlacao(x) # calcula os autovalores / autovetores da matriz acima w, v = utils.eig(c) index = np.argsort(w) # coloca os autovalores de forma crescente aux = [i for i in index[-1::-1]] index = aux # agora de forma descrescente w = w[index] v = v[:, index] wl = w[:k] vl = v[:, :k] mu, var = utils.calcula_media_variancia(x) xhat = np.real(np.dot((x - mu), vl)) # dados na nova base return xhat, wl, vl
def nls_asymptotic(X, S, U): S_lw = nlshrink_covariance(X, centered=True) d_lw = eig(S_lw, return_eigenvectors=False) return U, d_lw
def nls_asymptotic(sim, isotonic=False): X = sim.X S_lw = nlshrink_covariance(X, centered=True) d = eig(S_lw, return_eigenvectors=False) return isotonic_regression(d) if isotonic else d
def cov_est(self): ''' Calculates sample eigenvalues and eigenvectors from matrix of returns X ''' self.S = S = cov(self.X) self.lam, self.U = eig(S)