def _gmm_from_memberships(data, memberships, covariance_type): clusters = set(memberships) n_clusters = len(clusters) gmm = GMM(n_components=n_clusters, params='m') gmm.weights_ = np.ones([n_clusters])/n_clusters gmm.means_ = np.zeros([n_clusters, data.shape[1]]) if covariance_type == 'diag': gmm.covars_ = np.zeros([n_clusters, data.shape[1]]) if covariance_type == 'spherical': gmm.covars_ = np.zeros([n_clusters]) if covariance_type == 'full': gmm.covars_ = np.zeros([n_clusters, data.shape[1], data.shape[1]]) for cluster in clusters: cluster = int(cluster) indices = (memberships == cluster) gmm.means_[cluster, :] = data[indices, :].mean(axis=0) if covariance_type in ['diag', 'spherical']: #TODO Fix covariance calculation, for now, return cov=1 #D = np.diag(np.cov(data[indices, :].T)) D = np.ones([data.shape[1]]) if covariance_type == 'spherical': gmm.covars_[cluster] = D.mean() else: gmm.covars_[cluster] = D if covariance_type == 'full': cov_estimator = OAS() cov_estimator.fit(data[indices, :]) gmm.covars_[cluster] = cov_estimator.covariance_ return gmm
def fit_base(self, X, y): """ Fit the SLDA model to the base data. :param X: an Nxd torch tensor of base initialization data :param y: an Nx1-dimensional torch tensor of the associated labels for X :return: None """ print('\nFitting Base...') X = X.to(self.device) y = y.squeeze() # update positive and negative means cls_ix = torch.arange(self.num_classes) for k in torch.unique(y): self.posW[k] = X[y == k].mean(0) self.posT[k] = X[y == k].shape[0] for k in cls_ix: self.negW[k] = X[y != k].mean(0) self.negT[k] = X[y != k].shape[0] self.num_updates = X.shape[0] print('\nEstimating initial covariance matrix...') from sklearn.covariance import OAS cov_estimator = OAS(assume_centered=True) cov_estimator.fit((X - self.posW[y]).cpu().numpy()) self.Sigma = torch.from_numpy(cov_estimator.covariance_).float().to( self.device) print('\nBuilding initial OOD threshold(s)...') self.ood_predict(X, y) print('')
def _gmm_from_memberships(data, memberships, covariance_type): clusters = set(memberships) n_clusters = len(clusters) gmm = GMM(n_components=n_clusters, params='m') gmm.weights_ = np.ones([n_clusters]) / n_clusters gmm.means_ = np.zeros([n_clusters, data.shape[1]]) if covariance_type == 'diag': gmm.covars_ = np.zeros([n_clusters, data.shape[1]]) if covariance_type == 'spherical': gmm.covars_ = np.zeros([n_clusters]) if covariance_type == 'full': gmm.covars_ = np.zeros([n_clusters, data.shape[1], data.shape[1]]) for cluster in clusters: cluster = int(cluster) indices = (memberships == cluster) gmm.means_[cluster, :] = data[indices, :].mean(axis=0) if covariance_type in ['diag', 'spherical']: #TODO Fix covariance calculation, for now, return cov=1 #D = np.diag(np.cov(data[indices, :].T)) D = np.ones([data.shape[1]]) if covariance_type == 'spherical': gmm.covars_[cluster] = D.mean() else: gmm.covars_[cluster] = D if covariance_type == 'full': cov_estimator = OAS() cov_estimator.fit(data[indices, :]) gmm.covars_[cluster] = cov_estimator.covariance_ return gmm
def _shrink_covariance(asset_returns): """ Regularise/Shrink the asset covariances. :param asset_returns: (pd.Dataframe) Asset returns :return: (pd.Dataframe) Shrinked asset returns covariances """ oas = OAS() oas.fit(asset_returns) shrinked_covariance = oas.covariance_ return shrinked_covariance
def _shrink_covariance(covariance): """ Regularise/Shrink the asset covariances. :param covariance: (pd.Dataframe) asset returns covariances :return: (pd.Dataframe) shrinked asset returns covariances """ oas = OAS() oas.fit(covariance) shrinked_covariance = oas.covariance_ return pd.DataFrame(shrinked_covariance, index=covariance.columns, columns=covariance.columns)
def lw_covars(returns): """ Calculates a constrained covariance matrix between the returns. :return: A pandas dataframe of the covariance between the returns """ co_vars = returns.cov() * WEEKDAYS_PER_YEAR if logger.isEnabledFor(logging.DEBUG): logger.debug("Calcing covars as table: {}".format( returns.to_dict('list'))) # Shrink the covars (Ledoit and Wolff) sk = OAS(assume_centered=True) sk.fit(returns.values) return (1 - sk.shrinkage_) * co_vars + sk.shrinkage_ * np.trace( co_vars) / len(co_vars) * np.identity(len(co_vars))
def OAS_est(X): ''' OAS coefficient estimate X_size = (n_samples, n_features) ''' oa = OAS() cov_oa = oa.fit(X).covariance_ return cov_oa
def fit_base(self, X, y): """ Fit the SLDA model to the base data. :param X: an Nxd torch tensor of base initialization data :param y: an Nx1-dimensional torch tensor of the associated labels for X :return: None """ print('\nFitting Base...') # update class means for k in torch.unique(y): self.muK[k] = X[y == k].mean(0) self.cK[k] = X[y == k].shape[0] self.num_updates = X.shape[0] print('\nEstimating initial covariance matrix...') from sklearn.covariance import OAS cov_estimator = OAS(assume_centered=True) cov_estimator.fit((X - self.muK[y]).cpu().numpy()) self.Sigma = torch.from_numpy(cov_estimator.covariance_).float().to( self.device)
def correlations(df, categorical_portions): # The more samples, the slower, but the more accurate the categorical correlation NUM_CATEGORICAL_SAMPLES = 5 for i in range(NUM_CATEGORICAL_SAMPLES): df = df.append(df, ignore_index=True) categorical_cols = list(categorical_portions.keys()) # First generate continuous samples for categorical values. We do this by sampling from # a truncated normal distribution in the range for that continous variable. for categorical_col in categorical_cols: portions = categorical_portions[categorical_col] # The values of the categorical variable in order portions_keys = [val for val, frac in portions] for i, cat_val in enumerate(df[categorical_col]): if len(portions) == 1: # Normal sample df.loc[i, categorical_col] = norm.rvs() continue ind = portions_keys.index(cat_val) # Get sums of prev portions including and not including this portion sum_a = sum(map(lambda i: portions[i][1], range(ind))) sum_b = sum_a + portions[ind][1] # Get thresholds threshold_a = norm.ppf(sum_a, loc=0.0, scale=1.0) threshold_b = norm.ppf(sum_b, loc=0.0, scale=1.0) # Sample truncated norm df.loc[i, categorical_col] = truncnorm.rvs(threshold_a, threshold_b) # estimate covariance matrix estimator = OAS() estimator.fit(df.values) cov = pd.DataFrame(estimator.covariance_, index=df.columns, columns=df.columns) return cov
def test_oas(): """Tests OAS module on a simple dataset. """ # test shrinkage coeff on a simple data set oa = OAS() oa.fit(X, assume_centered=True) assert_almost_equal(oa.shrinkage_, 0.018740, 4) assert_almost_equal(oa.score(X, assume_centered=True), -5.03605, 4) # compare shrunk covariance obtained from data and from MLE estimate oa_cov_from_mle, oa_shinkrage_from_mle = oas(X, assume_centered=True) assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4) assert_almost_equal(oa_shinkrage_from_mle, oa.shrinkage_) # compare estimates given by OAS and ShrunkCovariance scov = ShrunkCovariance(shrinkage=oa.shrinkage_) scov.fit(X, assume_centered=True) assert_array_almost_equal(scov.covariance_, oa.covariance_, 4) # test with n_features = 1 X_1d = X[:, 0].reshape((-1, 1)) oa = OAS() oa.fit(X_1d, assume_centered=True) oa_cov_from_mle, oa_shinkrage_from_mle = oas(X_1d, assume_centered=True) assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4) assert_almost_equal(oa_shinkrage_from_mle, oa.shrinkage_) assert_array_almost_equal((X_1d ** 2).sum() / n_samples, oa.covariance_, 4) # test shrinkage coeff on a simple data set (without saving precision) oa = OAS(store_precision=False) oa.fit(X, assume_centered=True) assert_almost_equal(oa.score(X, assume_centered=True), -5.03605, 4) assert(oa.precision_ is None) ### Same tests without assuming centered data # test shrinkage coeff on a simple data set oa = OAS() oa.fit(X) assert_almost_equal(oa.shrinkage_, 0.020236, 4) assert_almost_equal(oa.score(X), 2.079025, 4) # compare shrunk covariance obtained from data and from MLE estimate oa_cov_from_mle, oa_shinkrage_from_mle = oas(X) assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4) assert_almost_equal(oa_shinkrage_from_mle, oa.shrinkage_) # compare estimates given by OAS and ShrunkCovariance scov = ShrunkCovariance(shrinkage=oa.shrinkage_) scov.fit(X) assert_array_almost_equal(scov.covariance_, oa.covariance_, 4) # test with n_features = 1 X_1d = X[:, 0].reshape((-1, 1)) oa = OAS() oa.fit(X_1d) oa_cov_from_mle, oa_shinkrage_from_mle = oas(X_1d) assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4) assert_almost_equal(oa_shinkrage_from_mle, oa.shrinkage_) assert_array_almost_equal(empirical_covariance(X_1d), oa.covariance_, 4) # test shrinkage coeff on a simple data set (without saving precision) oa = OAS(store_precision=False) oa.fit(X) assert_almost_equal(oa.score(X), 2.079025, 4) assert(oa.precision_ is None)
lw_mse = np.zeros((n_samples_range.size, repeat)) oa_mse = np.zeros((n_samples_range.size, repeat)) lw_shrinkage = np.zeros((n_samples_range.size, repeat)) oa_shrinkage = np.zeros((n_samples_range.size, repeat)) for i, n_samples in enumerate(n_samples_range): for j in range(repeat): X = np.dot( np.random.normal(size=(n_samples, n_features)), coloring_matrix.T) lw = LedoitWolf(store_precision=False) lw.fit(X, assume_centered=True) lw_mse[i,j] = lw.error_norm(real_cov, scaling=False) lw_shrinkage[i,j] = lw.shrinkage_ oa = OAS(store_precision=False) oa.fit(X, assume_centered=True) oa_mse[i,j] = oa.error_norm(real_cov, scaling=False) oa_shrinkage[i,j] = oa.shrinkage_ # plot MSE pl.subplot(2,1,1) pl.errorbar(n_samples_range, lw_mse.mean(1), yerr=lw_mse.std(1), label='Ledoit-Wolf', color='g') pl.errorbar(n_samples_range, oa_mse.mean(1), yerr=oa_mse.std(1), label='OAS', color='r') pl.ylabel("Squared error") pl.legend(loc="upper right") pl.title("Comparison of covariance estimators") pl.xlim(5, 31) # plot shrinkage coefficient
def test_oas(): """Tests OAS module on a simple dataset. """ # test shrinkage coeff on a simple data set X_centered = X - X.mean(axis=0) oa = OAS(assume_centered=True) oa.fit(X_centered) shrinkage_ = oa.shrinkage_ score_ = oa.score(X_centered) # compare shrunk covariance obtained from data and from MLE estimate oa_cov_from_mle, oa_shinkrage_from_mle = oas(X_centered, assume_centered=True) assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4) assert_almost_equal(oa_shinkrage_from_mle, oa.shrinkage_) # compare estimates given by OAS and ShrunkCovariance scov = ShrunkCovariance(shrinkage=oa.shrinkage_, assume_centered=True) scov.fit(X_centered) assert_array_almost_equal(scov.covariance_, oa.covariance_, 4) # test with n_features = 1 X_1d = X[:, 0].reshape((-1, 1)) oa = OAS(assume_centered=True) oa.fit(X_1d) oa_cov_from_mle, oa_shinkrage_from_mle = oas(X_1d, assume_centered=True) assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4) assert_almost_equal(oa_shinkrage_from_mle, oa.shrinkage_) assert_array_almost_equal((X_1d**2).sum() / n_samples, oa.covariance_, 4) # test shrinkage coeff on a simple data set (without saving precision) oa = OAS(store_precision=False, assume_centered=True) oa.fit(X_centered) assert_almost_equal(oa.score(X_centered), score_, 4) assert (oa.precision_ is None) ### Same tests without assuming centered data # test shrinkage coeff on a simple data set oa = OAS() oa.fit(X) assert_almost_equal(oa.shrinkage_, shrinkage_, 4) assert_almost_equal(oa.score(X), score_, 4) # compare shrunk covariance obtained from data and from MLE estimate oa_cov_from_mle, oa_shinkrage_from_mle = oas(X) assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4) assert_almost_equal(oa_shinkrage_from_mle, oa.shrinkage_) # compare estimates given by OAS and ShrunkCovariance scov = ShrunkCovariance(shrinkage=oa.shrinkage_) scov.fit(X) assert_array_almost_equal(scov.covariance_, oa.covariance_, 4) # test with n_features = 1 X_1d = X[:, 0].reshape((-1, 1)) oa = OAS() oa.fit(X_1d) oa_cov_from_mle, oa_shinkrage_from_mle = oas(X_1d) assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4) assert_almost_equal(oa_shinkrage_from_mle, oa.shrinkage_) assert_array_almost_equal(empirical_covariance(X_1d), oa.covariance_, 4) # test with one sample X_1sample = np.arange(5) oa = OAS() with warnings.catch_warnings(record=True): oa.fit(X_1sample) # test shrinkage coeff on a simple data set (without saving precision) oa = OAS(store_precision=False) oa.fit(X) assert_almost_equal(oa.score(X), score_, 4) assert (oa.precision_ is None)
def test_oas(): """Tests OAS module on a simple dataset. """ # test shrinkage coeff on a simple data set X_centered = X - X.mean(axis=0) oa = OAS(assume_centered=True) oa.fit(X_centered) shrinkage_ = oa.shrinkage_ score_ = oa.score(X_centered) # compare shrunk covariance obtained from data and from MLE estimate oa_cov_from_mle, oa_shinkrage_from_mle = oas(X_centered, assume_centered=True) assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4) assert_almost_equal(oa_shinkrage_from_mle, oa.shrinkage_) # compare estimates given by OAS and ShrunkCovariance scov = ShrunkCovariance(shrinkage=oa.shrinkage_, assume_centered=True) scov.fit(X_centered) assert_array_almost_equal(scov.covariance_, oa.covariance_, 4) # test with n_features = 1 X_1d = X[:, 0].reshape((-1, 1)) oa = OAS(assume_centered=True) oa.fit(X_1d) oa_cov_from_mle, oa_shinkrage_from_mle = oas(X_1d, assume_centered=True) assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4) assert_almost_equal(oa_shinkrage_from_mle, oa.shrinkage_) assert_array_almost_equal((X_1d ** 2).sum() / n_samples, oa.covariance_, 4) # test shrinkage coeff on a simple data set (without saving precision) oa = OAS(store_precision=False, assume_centered=True) oa.fit(X_centered) assert_almost_equal(oa.score(X_centered), score_, 4) assert(oa.precision_ is None) ### Same tests without assuming centered data # test shrinkage coeff on a simple data set oa = OAS() oa.fit(X) assert_almost_equal(oa.shrinkage_, shrinkage_, 4) assert_almost_equal(oa.score(X), score_, 4) # compare shrunk covariance obtained from data and from MLE estimate oa_cov_from_mle, oa_shinkrage_from_mle = oas(X) assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4) assert_almost_equal(oa_shinkrage_from_mle, oa.shrinkage_) # compare estimates given by OAS and ShrunkCovariance scov = ShrunkCovariance(shrinkage=oa.shrinkage_) scov.fit(X) assert_array_almost_equal(scov.covariance_, oa.covariance_, 4) # test with n_features = 1 X_1d = X[:, 0].reshape((-1, 1)) oa = OAS() oa.fit(X_1d) oa_cov_from_mle, oa_shinkrage_from_mle = oas(X_1d) assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4) assert_almost_equal(oa_shinkrage_from_mle, oa.shrinkage_) assert_array_almost_equal(empirical_covariance(X_1d), oa.covariance_, 4) # test with one sample X_1sample = np.arange(5) oa = OAS() with warnings.catch_warnings(record=True): oa.fit(X_1sample) # test shrinkage coeff on a simple data set (without saving precision) oa = OAS(store_precision=False) oa.fit(X) assert_almost_equal(oa.score(X), score_, 4) assert(oa.precision_ is None)
def test_oas(): # Tests OAS module on a simple dataset. # test shrinkage coeff on a simple data set X_centered = X - X.mean(axis=0) oa = OAS(assume_centered=True) oa.fit(X_centered) shrinkage_ = oa.shrinkage_ score_ = oa.score(X_centered) # compare shrunk covariance obtained from data and from MLE estimate oa_cov_from_mle, oa_shrinkage_from_mle = oas(X_centered, assume_centered=True) assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4) assert_almost_equal(oa_shrinkage_from_mle, oa.shrinkage_) # compare estimates given by OAS and ShrunkCovariance scov = ShrunkCovariance(shrinkage=oa.shrinkage_, assume_centered=True) scov.fit(X_centered) assert_array_almost_equal(scov.covariance_, oa.covariance_, 4) # test with n_features = 1 X_1d = X[:, 0:1] oa = OAS(assume_centered=True) oa.fit(X_1d) oa_cov_from_mle, oa_shrinkage_from_mle = oas(X_1d, assume_centered=True) assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4) assert_almost_equal(oa_shrinkage_from_mle, oa.shrinkage_) assert_array_almost_equal((X_1d ** 2).sum() / n_samples, oa.covariance_, 4) # test shrinkage coeff on a simple data set (without saving precision) oa = OAS(store_precision=False, assume_centered=True) oa.fit(X_centered) assert_almost_equal(oa.score(X_centered), score_, 4) assert(oa.precision_ is None) # Same tests without assuming centered data-------------------------------- # test shrinkage coeff on a simple data set oa = OAS() oa.fit(X) assert_almost_equal(oa.shrinkage_, shrinkage_, 4) assert_almost_equal(oa.score(X), score_, 4) # compare shrunk covariance obtained from data and from MLE estimate oa_cov_from_mle, oa_shrinkage_from_mle = oas(X) assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4) assert_almost_equal(oa_shrinkage_from_mle, oa.shrinkage_) # compare estimates given by OAS and ShrunkCovariance scov = ShrunkCovariance(shrinkage=oa.shrinkage_) scov.fit(X) assert_array_almost_equal(scov.covariance_, oa.covariance_, 4) # test with n_features = 1 X_1d = X[:, 0].reshape((-1, 1)) oa = OAS() oa.fit(X_1d) oa_cov_from_mle, oa_shrinkage_from_mle = oas(X_1d) assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4) assert_almost_equal(oa_shrinkage_from_mle, oa.shrinkage_) assert_array_almost_equal(empirical_covariance(X_1d), oa.covariance_, 4) # test with one sample # warning should be raised when using only 1 sample X_1sample = np.arange(5).reshape(1, 5) oa = OAS() assert_warns(UserWarning, oa.fit, X_1sample) assert_array_almost_equal(oa.covariance_, np.zeros(shape=(5, 5), dtype=np.float64)) # test shrinkage coeff on a simple data set (without saving precision) oa = OAS(store_precision=False) oa.fit(X) assert_almost_equal(oa.score(X), score_, 4) assert(oa.precision_ is None)
def BIC(X, Y, Z, p_lambda, mode=0, shrinkage=0): ''' Bayesian information criterion delta_BIC = BIC_M1 - BIC_M0 M1: feature vectors X and Y modeled by two multivariate gaussians M0: Z modeled by one gaussian delta_BIC > 0: accept M1 delta_BIC < 0: accept M0 covariance matrix dimension size < observation size, rank(cov) <= observation size - 1 rank deficient: http://stats.stackexchange.com/questions/60622/why-is-a-sample-covariance-matrix-singular-when-sample-size-is-less-than-number mode 0: BIC 1: BICc 2: ABF2 shrinkage 0: no shrinkage 1: Ledoit-Wolf 2: OAS :param X: frame * feature :param Y: frame * feature :param Z: frame * feature :param p_lambda: :return: ''' p = X.shape[1] N_x = X.shape[0] N_y = Y.shape[0] N_z = Z.shape[0] # centering data mean_X = np.mean(X, axis=0) mean_Y = np.mean(Y, axis=0) mean_Z = np.mean(Z, axis=0) X = X - mean_X Y = Y - mean_Y Z = Z - mean_Z if shrinkage == 1: lw = LedoitWolf(store_precision=False, assume_centered=False) lw.fit(X) sigma_x = lw.covariance_ lw.fit(Y) sigma_y = lw.covariance_ lw.fit(Z) sigma_z = lw.covariance_ elif shrinkage == 2: oa = OAS(store_precision=False, assume_centered=False) oa.fit(X) sigma_x = oa.covariance_ oa.fit(Y) sigma_y = oa.covariance_ oa.fit(Z) sigma_z = oa.covariance_ else: sigma_x = np.cov(X, rowvar=0) sigma_y = np.cov(Y, rowvar=0) sigma_z = np.cov(Z, rowvar=0) sign_z, logdet_z = np.linalg.slogdet(sigma_z) sign_y, logdet_y = np.linalg.slogdet(sigma_y) sign_x, logdet_x = np.linalg.slogdet(sigma_x) # det_z = sign_z*np.exp(logdet_z) # det_y = sign_y*np.exp(logdet_y) # det_x = sign_x*np.exp(logdet_x) R = (N_z/2.0) * logdet_z - \ (N_y/2.0) * logdet_y - \ (N_x/2.0) * logdet_x k_z = (p + p * (p + 1) / 2.0) if mode == 0: P = k_z * np.log(N_z) / 2.0 elif mode == 1: P = k_z * np.log(N_z) * (2.0 / (N_z - 2 * k_z - 1) - (1.0 / (N_z - k_z - 1))) / 2.0 P *= 10000 elif mode == 2: P = P_ABF2(mean_X, mean_Y, mean_Z, sigma_x, sigma_y, sigma_z, N_x, N_y, N_z) # print R, P, R-p_lambda*P, logdet_z, logdet_y, logdet_x, N_z, N_y, N_x # if det_z <0: print det_z # if det_y <0: print det_y # if det_x <0: print det_x return R - p_lambda * P
lw_mse = np.zeros((n_samples_range.size, repeat)) oa_mse = np.zeros((n_samples_range.size, repeat)) lw_shrinkage = np.zeros((n_samples_range.size, repeat)) oa_shrinkage = np.zeros((n_samples_range.size, repeat)) for i, n_samples in enumerate(n_samples_range): for j in range(repeat): X = np.dot(np.random.normal(size=(n_samples, n_features)), coloring_matrix.T) lw = LedoitWolf(store_precision=False, assume_centered=True) lw.fit(X) lw_mse[i, j] = lw.error_norm(real_cov, scaling=False) lw_shrinkage[i, j] = lw.shrinkage_ oa = OAS(store_precision=False, assume_centered=True) oa.fit(X) oa_mse[i, j] = oa.error_norm(real_cov, scaling=False) oa_shrinkage[i, j] = oa.shrinkage_ # plot MSE plt.subplot(2, 1, 1) plt.errorbar( n_samples_range, lw_mse.mean(1), yerr=lw_mse.std(1), label="Ledoit-Wolf", color="navy", lw=2, ) plt.errorbar( n_samples_range,
lw_mse = np.zeros((n_samples_range.size, repeat)) oa_mse = np.zeros((n_samples_range.size, repeat)) lw_shrinkage = np.zeros((n_samples_range.size, repeat)) oa_shrinkage = np.zeros((n_samples_range.size, repeat)) for i, n_samples in enumerate(n_samples_range): for j in range(repeat): X = np.dot( np.random.normal(size=(n_samples, n_features)), coloring_matrix.T) lw = LedoitWolf(store_precision=False, assume_centered=True) lw.fit(X) lw_mse[i, j] = lw.error_norm(real_cov, scaling=False) lw_shrinkage[i, j] = lw.shrinkage_ oa = OAS(store_precision=False, assume_centered=True) oa.fit(X) oa_mse[i, j] = oa.error_norm(real_cov, scaling=False) oa_shrinkage[i, j] = oa.shrinkage_ # plot MSE plt.subplot(2, 1, 1) plt.errorbar(n_samples_range, lw_mse.mean(1), yerr=lw_mse.std(1), label='Ledoit-Wolf', color='g') plt.errorbar(n_samples_range, oa_mse.mean(1), yerr=oa_mse.std(1), label='OAS', color='r') plt.ylabel("Squared error") plt.legend(loc="upper right") plt.title("Comparison of covariance estimators") plt.xlim(5, 31) # plot shrinkage coefficient
print timecourse_files # roll through the subjects print np.shape(timecourse_data)[0] for i in range(np.shape(timecourse_data)[0]) : #for i in range(10) : print i # extract the timecourses for this subejct subject_timecourses = timecourse_data[i, : ,:] #print np.shape(subject_timecourses) # calculate Pearson covariance X = scale(subject_timecourses, axis=1) cov = np.dot(X, np.transpose(X)) / np.shape(X)[1] print cov[:5, :5] print logm(cov)[:5, :5] # calculate sparse inverse covariance (precision) matrix model = OAS(store_precision=False, assume_centered=True) model.fit(np.transpose(X)) cov = model.covariance_ OAS_matrices[i, :] = np.reshape(cov, (1, 8100)) #print cov[:5, :5] foo = logm(cov) #print logm(cov[:5, :5]) ## save the data np.savetxt('/home/jonyoung/IoP_data/Data/connectivity_data/OAS_data.csv', OAS_matrices, delimiter=',')
X_test = np.dot(base_X_test, coloring_matrix) ############################################################################### # Compute Ledoit-Wolf and Covariances on a grid of shrinkages from sklearn.covariance import LedoitWolf, OAS, ShrunkCovariance, \ log_likelihood, empirical_covariance # Ledoit-Wolf optimal shrinkage coefficient estimate lw = LedoitWolf() loglik_lw = lw.fit(X_train, assume_centered=True).score( X_test, assume_centered=True) # OAS coefficient estimate oa = OAS() loglik_oa = oa.fit(X_train, assume_centered=True).score( X_test, assume_centered=True) # spanning a range of possible shrinkage coefficient values shrinkages = np.logspace(-3, 0, 30) negative_logliks = [-ShrunkCovariance(shrinkage=s).fit( X_train, assume_centered=True).score(X_test, assume_centered=True) \ for s in shrinkages] # getting the likelihood under the real model real_cov = np.dot(coloring_matrix.T, coloring_matrix) emp_cov = empirical_covariance(X_train) loglik_real = -log_likelihood(emp_cov, linalg.inv(real_cov)) ############################################################################### # Plot results pl.figure(-1)
repeat = 100 lw_mse = np.zeros((n_samples_range.size, repeat)) oas_mse = np.zeros((n_samples_range.size, repeat)) lw_shrinkage = np.zeros((n_samples_range.size, repeat)) oas_shrinkage = np.zeros((n_samples_range.size, repeat)) for i, n_samples in enumerate(n_samples_range): for j in range(repeat): X = np.dot(np.random.normal(size=(n_samples, n_features)), coloring_matrix) lw = LedoitWolf(store_precision=False, assume_centered=True) lw.fit(X) lw_mse[i, j] = lw.error_norm(real_cov, scaling=False) lw_shrinkage[i, j] = lw.shrinkage_ oas = OAS(store_precision=False, assume_centered=True) oas.fit(X) oas_mse[i, j] = oas.error_norm(real_cov, scaling=False) oas_shrinkage[i, j] = oas.shrinkage_ # plot MSE plt.subplot(211) plt.errorbar(n_samples_range, lw_mse.mean(1), yerr=lw_mse.std(1), label='Ledoit-Wolf', color='navy', lw=2) plt.errorbar(n_samples_range, oas_mse.mean(1), yerr=oas_mse.std(1), label='OAS',
def cov2corr(cov): std_ = np.sqrt(np.diag(cov)) corr = cov / np.outer(std_, std_) return corr if has_sklearn: from sklearn.covariance import LedoitWolf, OAS, MCD lw = LedoitWolf(store_precision=False) lw.fit(rr, assume_centered=False) cov_lw = lw.covariance_ corr_lw = cov2corr(cov_lw) oas = OAS(store_precision=False) oas.fit(rr, assume_centered=False) cov_oas = oas.covariance_ corr_oas = cov2corr(cov_oas) mcd = MCD() #.fit(rr, reweight=None) mcd.fit(rr, assume_centered=False) cov_mcd = mcd.covariance_ corr_mcd = cov2corr(cov_mcd) titles = ['raw correlation', 'lw', 'oas', 'mcd'] normcolor = None fig = plt.figure() for i, c in enumerate([rrcorr, corr_lw, corr_oas, corr_mcd]): #for i, c in enumerate([np.cov(rr, rowvar=0), cov_lw, cov_oas, cov_mcd]): ax = fig.add_subplot(2, 2, i + 1) plot_corr(c, xnames=None, title=titles[i], normcolor=normcolor, ax=ax)
def test_oas(): # Tests OAS module on a simple dataset. # test shrinkage coeff on a simple data set X_centered = X - X.mean(axis=0) oa = OAS(assume_centered=True) oa.fit(X_centered) shrinkage_ = oa.shrinkage_ score_ = oa.score(X_centered) # compare shrunk covariance obtained from data and from MLE estimate oa_cov_from_mle, oa_shinkrage_from_mle = oas(X_centered, assume_centered=True) assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4) assert_almost_equal(oa_shinkrage_from_mle, oa.shrinkage_) # compare estimates given by OAS and ShrunkCovariance scov = ShrunkCovariance(shrinkage=oa.shrinkage_, assume_centered=True) scov.fit(X_centered) assert_array_almost_equal(scov.covariance_, oa.covariance_, 4) # test with n_features = 1 X_1d = X[:, 0].reshape((-1, 1)) oa = OAS(assume_centered=True) oa.fit(X_1d) oa_cov_from_mle, oa_shinkrage_from_mle = oas(X_1d, assume_centered=True) assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4) assert_almost_equal(oa_shinkrage_from_mle, oa.shrinkage_) assert_array_almost_equal((X_1d**2).sum() / n_samples, oa.covariance_, 4) # test shrinkage coeff on a simple data set (without saving precision) oa = OAS(store_precision=False, assume_centered=True) oa.fit(X_centered) assert_almost_equal(oa.score(X_centered), score_, 4) assert (oa.precision_ is None) # Same tests without assuming centered data-------------------------------- # test shrinkage coeff on a simple data set oa = OAS() oa.fit(X) assert_almost_equal(oa.shrinkage_, shrinkage_, 4) assert_almost_equal(oa.score(X), score_, 4) # compare shrunk covariance obtained from data and from MLE estimate oa_cov_from_mle, oa_shinkrage_from_mle = oas(X) assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4) assert_almost_equal(oa_shinkrage_from_mle, oa.shrinkage_) # compare estimates given by OAS and ShrunkCovariance scov = ShrunkCovariance(shrinkage=oa.shrinkage_) scov.fit(X) assert_array_almost_equal(scov.covariance_, oa.covariance_, 4) # test with n_features = 1 X_1d = X[:, 0].reshape((-1, 1)) oa = OAS() oa.fit(X_1d) oa_cov_from_mle, oa_shinkrage_from_mle = oas(X_1d) assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4) assert_almost_equal(oa_shinkrage_from_mle, oa.shrinkage_) assert_array_almost_equal(empirical_covariance(X_1d), oa.covariance_, 4) # test with one sample # FIXME I don't know what this test does X_1sample = np.arange(5) oa = OAS() assert_warns(UserWarning, oa.fit, X_1sample) assert_array_almost_equal(oa.covariance_, np.zeros(shape=(5, 5), dtype=np.float64)) # test shrinkage coeff on a simple data set (without saving precision) oa = OAS(store_precision=False) oa.fit(X) assert_almost_equal(oa.score(X), score_, 4) assert (oa.precision_ is None)
# ############################################################################# # Compare different approaches to setting the parameter # GridSearch for an optimal shrinkage coefficient tuned_parameters = [{'shrinkage': shrinkages}] cv = GridSearchCV(ShrunkCovariance(), tuned_parameters, cv=5) cv.fit(X_train) # Ledoit-Wolf optimal shrinkage coefficient estimate lw = LedoitWolf() loglik_lw = lw.fit(X_train).score(X_test) # OAS coefficient estimate oa = OAS() loglik_oa = oa.fit(X_train).score(X_test) # ############################################################################# # Plot results fig = plt.figure() plt.title("Regularized covariance: likelihood and shrinkage coefficient") plt.xlabel('Regularization parameter: shrinkage coefficient') plt.ylabel('Error: negative log-likelihood on test data') # range shrinkage curve plt.loglog(shrinkages, negative_logliks, label="Negative log-likelihood") plt.plot(plt.xlim(), 2 * [loglik_real], '--r', label="Real covariance likelihood") # adjust view lik_max = np.amax(negative_logliks)
if args.verbose: print('sys.argv:') print(sys.argv) print() print('numpy version:', np.__version__) print('pandas version:', pd.__version__) print('scipy version:', sp.__version__) print() gene_expr_raw = pd.read_table(args.data) gene_expr = gene_expr_raw.T X_centered = (gene_expr - gene_expr.mean()) / np.sqrt(gene_expr.var()) oa = OAS(store_precision=True, assume_centered=True) gene_expr_OAS_corr = oa.fit(X_centered) n_genes = gene_expr_OAS_corr.covariance_.shape[1] g = Graph(directed=False) g.add_vertex(n=n_genes) spearman = g.new_ep("double", 0) pval = g.new_ep("double", 0) genes = g.new_vertex_property( "string", np.array(np.array(gene_expr.columns, dtype="str"))) g.vertex_properties["genes"] = genes for i in range(n_genes): for j in range(i): spearman_r = sp.stats.spearmanr(X_centered.iloc[:, i], X_centered.iloc[:, j])
lw_mse = np.zeros((n_samples_range.size, repeat)) oa_mse = np.zeros((n_samples_range.size, repeat)) lw_shrinkage = np.zeros((n_samples_range.size, repeat)) oa_shrinkage = np.zeros((n_samples_range.size, repeat)) for i, n_samples in enumerate(n_samples_range): for j in range(repeat): X = np.dot(np.random.normal(size=(n_samples, n_features)), coloring_matrix.T) lw = LedoitWolf(store_precision=False) lw.fit(X, assume_centered=True) lw_mse[i, j] = lw.error_norm(real_cov, scaling=False) lw_shrinkage[i, j] = lw.shrinkage_ oa = OAS(store_precision=False) oa.fit(X, assume_centered=True) oa_mse[i, j] = oa.error_norm(real_cov, scaling=False) oa_shrinkage[i, j] = oa.shrinkage_ # plot MSE pl.subplot(2, 1, 1) pl.errorbar(n_samples_range, lw_mse.mean(1), yerr=lw_mse.std(1), label='Ledoit-Wolf', color='g') pl.errorbar(n_samples_range, oa_mse.mean(1), yerr=oa_mse.std(1), label='OAS', color='r')
def cov2corr(cov): std_ = np.sqrt(np.diag(cov)) corr = cov / np.outer(std_, std_) return corr if has_sklearn: from sklearn.covariance import LedoitWolf, OAS, MCD lw = LedoitWolf(store_precision=False) lw.fit(rr, assume_centered=False) cov_lw = lw.covariance_ corr_lw = cov2corr(cov_lw) oas = OAS(store_precision=False) oas.fit(rr, assume_centered=False) cov_oas = oas.covariance_ corr_oas = cov2corr(cov_oas) mcd = MCD()#.fit(rr, reweight=None) mcd.fit(rr, assume_centered=False) cov_mcd = mcd.covariance_ corr_mcd = cov2corr(cov_mcd) titles = ['raw correlation', 'lw', 'oas', 'mcd'] normcolor = None fig = plt.figure() for i, c in enumerate([rrcorr, corr_lw, corr_oas, corr_mcd]): #for i, c in enumerate([np.cov(rr, rowvar=0), cov_lw, cov_oas, cov_mcd]): ax = fig.add_subplot(2,2,i+1) plot_corr(c, xnames=None, title=titles[i],
def test_oas(): # Tests OAS module on a simple dataset. # test shrinkage coeff on a simple data set X_centered = X - X.mean(axis=0) oa = OAS(assume_centered=True) oa.fit(X_centered) shrinkage_ = oa.shrinkage_ score_ = oa.score(X_centered) # compare shrunk covariance obtained from data and from MLE estimate oa_cov_from_mle, oa_shrinkage_from_mle = oas(X_centered, assume_centered=True) assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4) assert_almost_equal(oa_shrinkage_from_mle, oa.shrinkage_) # compare estimates given by OAS and ShrunkCovariance scov = ShrunkCovariance(shrinkage=oa.shrinkage_, assume_centered=True) scov.fit(X_centered) assert_array_almost_equal(scov.covariance_, oa.covariance_, 4) # test with n_features = 1 X_1d = X[:, 0:1] oa = OAS(assume_centered=True) oa.fit(X_1d) oa_cov_from_mle, oa_shrinkage_from_mle = oas(X_1d, assume_centered=True) assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4) assert_almost_equal(oa_shrinkage_from_mle, oa.shrinkage_) assert_array_almost_equal((X_1d**2).sum() / n_samples, oa.covariance_, 4) # test shrinkage coeff on a simple data set (without saving precision) oa = OAS(store_precision=False, assume_centered=True) oa.fit(X_centered) assert_almost_equal(oa.score(X_centered), score_, 4) assert (oa.precision_ is None) # Same tests without assuming centered data-------------------------------- # test shrinkage coeff on a simple data set oa = OAS() oa.fit(X) assert_almost_equal(oa.shrinkage_, shrinkage_, 4) assert_almost_equal(oa.score(X), score_, 4) # compare shrunk covariance obtained from data and from MLE estimate oa_cov_from_mle, oa_shrinkage_from_mle = oas(X) assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4) assert_almost_equal(oa_shrinkage_from_mle, oa.shrinkage_) # compare estimates given by OAS and ShrunkCovariance scov = ShrunkCovariance(shrinkage=oa.shrinkage_) scov.fit(X) assert_array_almost_equal(scov.covariance_, oa.covariance_, 4) # test with n_features = 1 X_1d = X[:, 0].reshape((-1, 1)) oa = OAS() oa.fit(X_1d) oa_cov_from_mle, oa_shrinkage_from_mle = oas(X_1d) assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4) assert_almost_equal(oa_shrinkage_from_mle, oa.shrinkage_) assert_array_almost_equal(empirical_covariance(X_1d), oa.covariance_, 4) # test with one sample # warning should be raised when using only 1 sample X_1sample = np.arange(5).reshape(1, 5) oa = OAS() warn_msg = ( "Only one sample available. You may want to reshape your data array") with pytest.warns(UserWarning, match=warn_msg): oa.fit(X_1sample) assert_array_almost_equal(oa.covariance_, np.zeros(shape=(5, 5), dtype=np.float64)) # test shrinkage coeff on a simple data set (without saving precision) oa = OAS(store_precision=False) oa.fit(X) assert_almost_equal(oa.score(X), score_, 4) assert (oa.precision_ is None)