def shrinked_covariance(returns, price_data=False, shrinkage_type='basic', assume_centered=False, basic_shrinkage=0.1): """ Calculates the Covariance estimator with shrinkage for a dataframe of asset prices or returns. This function allows three types of shrinkage - Basic, Ledoit-Wolf and Oracle Approximating Shrinkage. It is a wrap of the sklearn's ShrunkCovariance, LedoitWolf and OAS classes. According to the scikit-learn User Guide on Covariance estimation: "Sometimes, it even occurs that the empirical covariance matrix cannot be inverted for numerical reasons. To avoid such an inversion problem, a transformation of the empirical covariance matrix has been introduced: the shrinkage. Mathematically, this shrinkage consists in reducing the ratio between the smallest and the largest eigenvalues of the empirical covariance matrix". Link to the documentation: <https://scikit-learn.org/stable/modules/covariance.html>`_ If a dataframe of prices is given, it is transformed into a dataframe of returns using the calculate_returns method from the ReturnsEstimators class. :param returns: (pd.DataFrame) Dataframe where each column is a series of returns or prices for an asset. :param price_data: (bool) Flag if prices of assets are used and not returns. (False by default) :param shrinkage_type: (str) Type of shrinkage to use. (``basic`` by default, ``lw``, ``oas``, ``all``) :param assume_centered: (bool) Flag for data with mean almost, but not exactly zero. (Read documentation for chosen shrinkage class, False by default) :param basic_shrinkage: (float) Between 0 and 1. Coefficient in the convex combination for basic shrinkage. (0.1 by default) :return: (np.array) Estimated covariance matrix. Tuple of covariance matrices if shrinkage_type = ``all``. """ # Calculating the series of returns from series of prices if price_data: # Class with returns calculation function ret_est = ReturnsEstimators() # Calculating returns returns = ret_est.calculate_returns(returns) # Calculating the covariance matrix for the chosen method if shrinkage_type == 'basic': cov_matrix = ShrunkCovariance(assume_centered=assume_centered, shrinkage=basic_shrinkage).fit( returns).covariance_ elif shrinkage_type == 'lw': cov_matrix = LedoitWolf(assume_centered=assume_centered).fit(returns).covariance_ elif shrinkage_type == 'oas': cov_matrix = OAS(assume_centered=assume_centered).fit(returns).covariance_ else: cov_matrix = ( ShrunkCovariance(assume_centered=assume_centered, shrinkage=basic_shrinkage).fit(returns).covariance_, LedoitWolf(assume_centered=assume_centered).fit(returns).covariance_, OAS(assume_centered=assume_centered).fit(returns).covariance_) return cov_matrix
def similarity_measure_mahalanobis(ds_tar, ds_src, results, p_value=0.95): print 'Computing Mahalanobis similarity...' # TODO: The function parameters must be the two datasets, # TODO: src is the one with parameter calculation, second is the similarity one # Get classifier from results classifier = results['fclf'] # Make prediction on training set, to understand data distribution ## TODO: Evaluate if it is correct! classifier_predictions_src = classifier.predict(ds_src) prediction_mask = np.array(classifier_predictions_src) == ds_src.targets example_dist = dict() # Extract feature selected from each dataset if isinstance(classifier, FeatureSelectionClassifier): f_selection = results['fclf'].mapper ds_tar = f_selection(ds_tar) ds_src = f_selection(ds_src) ''' Get class distribution information: mean and covariance ''' for label in np.unique(ds_src.targets): # Get examples correctly classified mask = ds_src.targets == label example_dist[label] = dict() true_ex = ds_src.samples[mask * prediction_mask] # Get Mean and Covariance to draw the distribution # We evaluate mean and cov only on well-classified examples mean_ = np.mean(true_ex, axis=0) example_dist[label]['mean'] = mean_ print 'Estimation of covariance matrix for ' + label + ' class...' print true_ex.shape try: #cov_ = MinCovDet().transform(true_ex) cov_ = LedoitWolf().transform(true_ex) #cov_ = EmpiricalCovariance().transform(true_ex) #cov_ = GraphLasso(alpha=0.5).transform(true_ex) #cov_ = OAS(alpha=0.1).transform(true_ex) except MemoryError, err: print 'Method is LedoitWolf' cov_ = LedoitWolf(block_size=15000).transform(true_ex) example_dist[label]['i_cov'] = cov_.precision_ print 'Inverted covariance estimated...'
def test_ledoit_wolf_large(): # test that ledoit_wolf doesn't error on data that is wider than block_size rng = np.random.RandomState(0) # use a number of features that is larger than the block-size X = rng.normal(size=(10, 20)) lw = LedoitWolf(block_size=10).fit(X) # check that covariance is about diagonal (random normal noise) assert_almost_equal(lw.covariance_, np.eye(20), 0) cov = lw.covariance_ # check that the result is consistent with not splitting data into blocks. lw = LedoitWolf(block_size=25).fit(X) assert_almost_equal(lw.covariance_, cov)
def lw(data, alphas): """ Estimates the graph with Ledoit-Wolf estimator. Parameters ---------- data: numpy ndarray The input data for to reconstruct/estimate a graph on. Features as columns and observations as rows. alphas: float The threshold on the precision matrix to determine edges. Returns ------- adjacency matrix : the estimated adjacency matrix. """ alpha=alphas scaler = StandardScaler() data = scaler.fit_transform(data) cov = LedoitWolf().fit(data) precision_matrix = cov.get_precision() n_features, _ = precision_matrix.shape mask1 = np.abs(precision_matrix) > alpha mask0 = np.abs(precision_matrix) <= alpha adjacency_matrix = np.zeros((n_features,n_features)) adjacency_matrix[mask1] = 1 adjacency_matrix[mask0] = 0 adjacency_matrix[np.diag_indices_from(adjacency_matrix)] = 0 return adjacency_matrix
def estimatorLedoitWolf(self): #remove Date column for this function trimmedData = self.data.drop('Date', axis=1) cov = LedoitWolf().fit(trimmedData).covariance_ #centers the data assert cov.shape == self.expectedCovShape self.cov = cov return self.cov
def __call__(self, train_list, rest_list, clear_after_use=False): print("Apply Whitening...") if clear_after_use: self.sigma_neg_sqrt = None self.shrinkage_parameter = None if self.sigma_neg_sqrt is None: train_stacked = np.concatenate([d.x for d in train_list], axis=0) # Fit LedoitWolf for covariance estimation lw = LedoitWolf().fit(train_stacked) self.shrinkage_parameter = lw.shrinkage_ print(" Estimated shrinkage-parameter={:.3f}".format( self.shrinkage_parameter)) # estimated covariance matrix sigma = lw.covariance_ # eigenvalue decomposition eig_values, eig_vectors = np.linalg.eig(sigma) # negative square root of eigenvalues eig_values_neg_sqrt = np.diag(1 / np.sqrt(eig_values + self.eps)) # negative square root of sigma self.sigma_neg_sqrt = np.dot( np.dot(eig_vectors, eig_values_neg_sqrt), eig_vectors.T) def tensor_whiten(data): x = data.x x = np.dot(x, self.sigma_neg_sqrt) return RawData.create_from_ref(data, x=x) return self.transform(tensor_whiten, train_list, rest_list)
def filter_W_fromVcv(vcv, variance_perc=1.0): '''vcv is a filtered value for the Vcv, with shapes T,N,N. It filters init_W,init_df that are the initial distribution parameters for W's posterior. W is the diffusion matrix of the components of the cholesky-decomposition of vcv. It filters also init_vcv_std, the standard deviations of this components' posteriors. ''' [T, N, _] = vcv.shape num_tril = int(N * (N + 1) / 2) chol_vcv = np.zeros([T, int(N * (N + 1) / 2)]) ind = indexes_librarian(N) for t in range(T): cvcv = np.linalg.cholesky(vcv[t]) chol_vcv[t, ind.spiral_diag] = inv_softplus(cvcv[ind.diag[0], ind.diag[1]]) chol_vcv[t, ind.spiral_udiag] = cvcv[ind.udiag[0], ind.udiag[1]] cov = LedoitWolf().fit(chol_vcv[1:, :] - chol_vcv[:-1, :]) init_W = cov.covariance_ try: np.linalg.cholesky(init_W) except: #adds a constant term if init_W is singular print('W resulted singular, a correction term (I*1e-4) is added') init_W += np.eye(num_tril) * 1e-4 init_df = np.max([4 * num_tril / variance_perc, num_tril]) init_W *= 2 init_vcv_std = np.abs(chol_vcv) * 0.1 / N * variance_perc #init_vcv_std=np.tile(np.reshape(np.abs(vcv).mean(axis=0),[1,N,N]),[T,1,1])/np.sqrt(N)*variance_perc return np.float32(init_W), np.float32(init_df), np.float32(init_vcv_std)
def query_samples_and_probabilities(pydc,query,evidence,var,std=False): pydc.queryWithSamples(NUM_SAMPLES,query,evidence,var,FLAG,BIGNUM) parsed_samples = ast.literal_eval(pydc.samples) values = [] weights = [] for sample in parsed_samples: x, w = sample[0], sample[1] values += [x] weights += [w] values, weights = np.array(values), np.array(weights) if std: avg, std = weighted_avg_and_std(values, weights) return avg, std else: #values = values + 1e-5*np.random.rand(*values.shape) avg, cov = weighted_avg_and_cov(values, weights) print avg #X = np.random.multivariate_normal(mean=avg,cov=cov,size=100) #shcov = LedoitWolf().fit(X) #assert cov is positive-semidefinite try: assert(np.all(np.linalg.eigvals(cov) >= 0)) except AssertionError: X = np.random.multivariate_normal(mean=avg,cov=cov,size=100) shcov = LedoitWolf().fit(X) avg, cov = shcov.location_, shcov.covariance_ #assert(np.all(np.linalg.eigvals(cov) >= 0)) return (avg, cov)
def compute_connectivity_subject(conn, masker, func, confound=None): """ Returns connectivity of one fMRI for a given atlas """ ts = do_mask_img(masker, func, confound) if conn == 'gl': fc = GraphLassoCV(max_iter=1000) elif conn == 'lw': fc = LedoitWolf() elif conn == 'oas': fc = OAS() elif conn == 'scov': fc = ShrunkCovariance() fc = Bunch(covariance_=0, precision_=0) if conn == 'corr' or conn == 'pcorr': fc = Bunch(covariance_=0, precision_=0) fc.covariance_ = np.corrcoef(ts) fc.precision_ = partial_corr(ts) else: fc.fit(ts) ind = np.tril_indices(ts.shape[1], k=-1) return fc.covariance_[ind], fc.precision_[ind]
def _simulate_covariance(mu_vector, cov_matrix, num_obs, lw_shrinkage=False): """ Derives an empirical vector of means and an empirical covariance matrix. Based on the set of true means vector and covariance matrix of X distributions, the function generates num_obs observations for every X. Based on these observations simulated vector of means and the simulated covariance matrix are obtained. :param mu_vector: (np.array) True means vector for X distributions :param cov_matrix: (np.array) True covariance matrix for X distributions :param num_obs: (int) Number of observations to draw for every X :param lw_shrinkage: (bool) Flag to apply Ledoit-Wolf shrinkage to X (False by default) :return: (np.array, np.array) Empirical means vector, empirical covariance matrix """ # Generating a matrix of num_obs observations for X distributions observations = np.random.multivariate_normal(mu_vector.flatten(), cov_matrix, size=num_obs) # Empirical means vector calculation mu_simulated = observations.mean(axis=0).reshape(-1, 1) if lw_shrinkage: # If applying Ledoit-Wolf shrinkage cov_simulated = LedoitWolf().fit(observations).covariance_ else: # Simple empirical covariance matrix cov_simulated = np.cov(observations, rowvar=False) return mu_simulated, cov_simulated
def postProcessing(nifti_file, subject_key, spheres_masker): """Perform post processing param nifti_file: string. path to the nifty file param subject_key: string. subject's key return: dictionary raw. key: subject's key . value: {"time_series" : matrix of time series (time_points,rois), "covariance" : covariance matrix of atlas rois (rois, rois), "correlation" : correlation matrix of atlas rois (rois, rois)} """ try: print("subject_key: " + subject_key) print("Extract timeseries") # Extract the time series print(nifti_file) timeseries = spheres_masker.fit_transform(nifti_file, confounds=None) print("Extract covariance matrix") cov_measure = ConnectivityMeasure(cov_estimator=LedoitWolf( assume_centered=False, block_size=1000, store_precision=False), kind='covariance') cov = [] cor = [] cov = cov_measure.fit_transform([timeseries])[0, :, :] print("Extract correlation matrix") cor = nilearn.connectome.cov_to_corr(cov) except: raise Exception("subject_key: %s \n" % subject_key + traceback.format_exc()) return (subject_key, { "time_series": timeseries, "covariance": cov, "correlation": cor })
def compute_network_connectivity_subject(conn, func, masker, rois): """ Returns connectivity of one fMRI for a given atlas """ ts = masker.fit_transform(func) ts = np.asarray(ts)[:, rois] if conn == 'gl': fc = GraphLassoCV(max_iter=1000) elif conn == 'lw': fc = LedoitWolf() elif conn == 'oas': fc = OAS() elif conn == 'scov': fc = ShrunkCovariance() fc = Bunch(covariance_=0, precision_=0) if conn == 'corr' or conn == 'pcorr': fc = Bunch(covariance_=0, precision_=0) fc.covariance_ = np.corrcoef(ts) fc.precision_ = partial_corr(ts) else: fc.fit(ts) ind = np.tril_indices(ts.shape[1], k=-1) return fc.covariance_[ind], fc.precision_[ind]
def weight_opt(returns,benchmark, lower = 0, upper = 1, ph=2**7, cov_method='sample', seed = 123): np.random.seed(seed) n_asset, n_sample = returns.shape rets = np.asmatrix(returns) #N = 10 #phs = [2**(t-2) for t in range(N)] # Convert to cvxopt matrices if cov_method == 'sample': Cov = opt.matrix(np.cov(rets,benchmark)) elif cov_method == 'lw': Cov = opt.matrix(LedoitWolf().fit(np.append(np.transpose(rets),benchmark.reshape(n_sample,1), axis=1)).covariance_) else: raise ValueError('cov_method should be in {}'.format({'sample', 'lw'})) S = Cov[:n_asset,:n_asset] r_mean = opt.matrix(np.nanmean(rets, axis=1)) # n*1 Cb = Cov[:n_asset,n_asset] # Create constraint matrices G = opt.matrix(np.append(np.eye(n_asset),-np.eye(n_asset),axis = 0)) # 2n x n identity matrix h = opt.matrix(np.append(upper*np.ones((n_asset,1)),-lower*np.ones((n_asset,1)),axis = 0)) A = opt.matrix(1.0, (1, n_asset)) b = opt.matrix(1.0) # Calculate efficient frontier weights using quadratic programming x = solvers.qp(ph*S, -ph*Cb-r_mean, G, h, A, b)['x'] #portfolios = [solvers.qp(ph*S, -ph*Cb-r_mean, G, h, A, b)['x'] # for ph in phs] # CALCULATE RISKS AND RETURNS FOR FRONTIER ret = blas.dot(r_mean, x) #[blas.dot(r_mean, x) for x in portfolios] errors = blas.dot(x, S*x)+Cov[n_asset,n_asset]-2*blas.dot(Cb,x) #[blas.dot(x, S*x)+Cov[n_asset,n_asset]-2*blas.dot(Cb,x) for x in portfolios] return np.transpose(np.array(x))[0], ret, errors#, ret_opt, risk_opt
def test_lda_predict(): # Test LDA classification. # This checks that LDA implements fit and predict and returns correct # values for simple toy data. for test_case in solver_shrinkage: solver, shrinkage = test_case clf = LinearDiscriminantAnalysis(solver=solver, shrinkage=shrinkage) y_pred = clf.fit(X, y).predict(X) assert_array_equal(y_pred, y, "solver %s" % solver) # Assert that it works with 1D data y_pred1 = clf.fit(X1, y).predict(X1) assert_array_equal(y_pred1, y, "solver %s" % solver) # Test probability estimates y_proba_pred1 = clf.predict_proba(X1) assert_array_equal((y_proba_pred1[:, 1] > 0.5) + 1, y, "solver %s" % solver) y_log_proba_pred1 = clf.predict_log_proba(X1) assert_allclose( np.exp(y_log_proba_pred1), y_proba_pred1, rtol=1e-6, atol=1e-6, err_msg="solver %s" % solver, ) # Primarily test for commit 2f34950 -- "reuse" of priors y_pred3 = clf.fit(X, y3).predict(X) # LDA shouldn't be able to separate those assert np.any(y_pred3 != y3), "solver %s" % solver clf = LinearDiscriminantAnalysis(solver="svd", shrinkage="auto") with pytest.raises(NotImplementedError): clf.fit(X, y) clf = LinearDiscriminantAnalysis(solver="lsqr", shrinkage=0.1, covariance_estimator=ShrunkCovariance()) with pytest.raises( ValueError, match=("covariance_estimator and shrinkage " "parameters are not None. " "Only one of the two can be set."), ): clf.fit(X, y) # test bad solver with covariance_estimator clf = LinearDiscriminantAnalysis(solver="svd", covariance_estimator=LedoitWolf()) with pytest.raises(ValueError, match="covariance estimator is not supported with svd"): clf.fit(X, y) # test bad covariance estimator clf = LinearDiscriminantAnalysis(solver="lsqr", covariance_estimator=KMeans( n_clusters=2, n_init="auto")) with pytest.raises(ValueError): clf.fit(X, y)
def simulateLogNormal(data, covtype='Estimate', nsamples=2000, **kwargs): """ :param data: :param covtype: Type of covariance matrix estimator. Allowed types are: - Estimate (default): - Diagonal: - Shrinkage OAS: :param int nsamples: Number of simulated samples to draw :return: simulated data and empirical covariance est """ try: # Offset data to make sure there are no 0 values for log transform offset = np.min(data) + 1 offdata = data + offset # log on the offsetted data logdata = np.log(offdata) # Get the means meanslog = np.mean(logdata, axis=0) # Specify covariance # Regular covariance estimator if covtype == "Estimate": covlog = np.cov(logdata, rowvar=0) # Shrinkage covariance estimator, using LedoitWolf elif covtype == "ShrinkageLedoitWolf": scov = LedoitWolf() scov.fit(logdata) covlog = scov.covariance_ elif covtype == "ShrinkageOAS": scov = OAS() scov.fit(logdata) covlog = scov.covariance_ # Diagonal covariance matrix (no between variable correlation) elif covtype == "Diagonal": covlogdata = np.var( logdata, axis=0) #get variance of log data by each column covlog = np.diag( covlogdata ) #generate a matrix with diagonal of variance of log Data else: raise ValueError('Unknown Covariance type') simData = np.random.multivariate_normal(meanslog, covlog, nsamples) simData = np.exp(simData) simData -= offset ##Set to 0 negative values simData[np.where(simData < 0)] = 0 # work out the correlation of matrix by columns, each column is a variable corrMatrix = np.corrcoef(simData, rowvar=0) return simData, corrMatrix except Exception as exp: raise exp
def __init__(self, sharpes, returns): """ Initialize AuthorModelBuilder object. Parameters ---------- sharpes : pd.DataFrame Long-format DataFrame of in-sample Sharpe ratios (from user-run backtests), indexed by user, algorithm and code ID. Note that currently, backtests are deduplicated based on code id. See fit_authors for more information. """ self.num_authors = sharpes.meta_user_id.nunique() self.num_algos = sharpes.meta_algorithm_id.nunique() # For num_backtests, nunique() and count() should be the same self.num_backtests = sharpes.meta_code_id.nunique() # Which algos correspond to which authors? df = (sharpes.loc[:, ['meta_user_id', 'meta_algorithm_id']]. drop_duplicates( subset='meta_algorithm_id', keep='first').reset_index().meta_user_id.astype(str)) self.author_to_algo_encoding = LabelEncoder().fit_transform(df) # Which backtests correspond to which algos? df = sharpes.meta_algorithm_id.astype(str) self.algo_to_backtest_encoding = LabelEncoder().fit_transform(df) # Which backtests correspond to which authors? df = sharpes.meta_user_id.astype(str) self.author_to_backtest_encoding = LabelEncoder().fit_transform(df) # Construct correlation matrix. # 0 is a better estimate for mean returns than the sample mean! returns_ = returns / returns.std() self.corr = LedoitWolf(assume_centered=True).fit(returns_).covariance_ self.model = self._build_model(sharpes, self.corr) self.coords = { 'meta_user_id': sharpes.meta_user_id.drop_duplicates().values, 'meta_algorithm_id': sharpes.meta_algorithm_id.drop_duplicates().values, 'meta_code_id': sharpes.meta_code_id.values } self.dims = { 'mu_global': (), 'mu_author': ('meta_user_id', ), 'mu_author_raw': ('meta_user_id', ), 'mu_author_sd': (), 'mu_algo': ('meta_algorithm_id', ), 'mu_algo_raw': ('meta_algorithm_id', ), 'mu_algo_sd': (), 'mu_backtest': ('meta_code_id', ), 'sigma_backtest': ('meta_code_id', ), 'alpha_author': ('meta_user_id', ), 'alpha_algo': ('meta_algorithm_id', ) }
def GetModelParams(DataFrame, ColumnIndex): cDataSet = DataFrame cData0 = cDataSet[cDataSet['target'] == 0] cData1 = cDataSet[cDataSet['target'] == 1] bData0 = np.array(cData0[ColumnIndex]) bData1 = np.array(cData1[ColumnIndex]) Cov0 = LedoitWolf(assume_centered=False).fit(bData0) Cov1 = LedoitWolf(assume_centered=False).fit(bData1) Mean0 = bData0.mean(axis=0) Mean1 = bData1.mean(axis=0) return Cov0.covariance_, Cov1.covariance_, Mean0, Mean1
def LedoitWolf_covMatrix(X): logger.info( 'Se realiza el calculo de la matriz de covarianza con Shrinkage') cov = LedoitWolf().fit(X) cov_matrix = cov.covariance_ mean_vector = cov.location_ return cov_matrix, mean_vector
def simCovMu(mu0, cov0, nObs, shrink=False): x = np.random.multivariate_normal(mu0.flatten(), cov0, size = nObs) #print(x.shape) mu1 = x.mean(axis = 0).reshape(-1,1) #calc mean of columns of rand matrix #print(mu1.shape) if shrink: cov1 = LedoitWolf().fit(x).covariance_ else: cov1 = np.cov(x, rowvar=0) return mu1, cov1
def prior_vector_variability(x): """ Estimate the covariance matrix of x with the LedoitWolf estimator :param x: an array of dim (t,n) :return: The estimated covariance matrix """ dx = LedoitWolf().fit(x).covariance_ return dx
def test_ledoit_wolf_small(): # Compare our blocked implementation to the naive implementation X_small = X[:, :4] lw = LedoitWolf() lw.fit(X_small) shrinkage_ = lw.shrinkage_ assert_almost_equal(shrinkage_, _naive_ledoit_wolf_shrinkage(X_small))
def untangle(X: Iterable, y: Iterable, n_clusters: int = None, get_connectivity: bool = True, compute_distances: bool = True, kind: str = 'correlation', agglo_kws: Union[dict, Bunch] = None) -> FeatureAgglomeration: from nilearn.connectome import ConnectivityMeasure as CM from sklearn.cluster import FeatureAgglomeration from sklearn.covariance import LedoitWolf from sklearn.feature_selection import SelectKBest from sklearn.feature_selection import mutual_info_classif agglo_defs = dict(affinity='euclidean', compute_full_tree='auto', linkage='ward', pooling_func=np.mean, distance_threshold=None, compute_distances=compute_distances) if get_connectivity is True: connect_mat = CM(LedoitWolf(), kind=kind).fit_transform([X.values])[0] else: connect_mat = None if n_clusters is None: n_clusters = divmod(X.shape[1], 2)[0] - 1 if n_clusters == 0: n_clusters = 1 if agglo_kws is None: agglo_kws = {} agglo_defs.update(agglo_kws) agglo = FeatureAgglomeration(n_clusters=n_clusters, connectivity=connect_mat, **agglo_defs) if not isinstance(y, pd.Series): y = pd.Series(y) if not isinstance(X, pd.DataFrame): X = pd.DataFrame(X) agglo.fit(X, y) setattr( agglo, 'cluster_indexes_', pd.DataFrame(zip(agglo.labels_, agglo.feature_names_in_), columns=['cluster', 'feature']).groupby('cluster').feature) skb = SelectKBest(k=1, score_func=mutual_info_classif) factor_leaders_ = [ skb.fit(X[itm[1]], y).get_feature_names_out()[0] for itm in tuple(agglo.cluster_indexes_) ] setattr(agglo, 'factor_leaders_', factor_leaders_) return agglo
def __init__(self, cov_estimator=LedoitWolf(store_precision=False), kind='covariance', vectorize=False, discard_diagonal=False): self.cov_estimator = cov_estimator self.kind = kind self.vectorize = vectorize self.discard_diagonal = discard_diagonal
def connectivity(subjects_ts, kinds=kinds, saveas='file'): """ Estimates Functional Connectivity using several estimation models Parameters ---------- subjects_ts: array-like , 2-D (n_subjects,n_regions) Array of BOLD time-series kinds: list of kinds of connectivity measure to be computed . kinds include : ' correlation ' , ' partial correlation', ' tangent' , 'covariance' . saveas : Destination to save and load output (.npz) Returns --------- mean_connectivity_matrix: dictionary , {'kind' : (n_regions,n_regions)} Group-level functional connectivity matrix individual_connectivity_matrix: dictionary , {'kind' : (n_subjects,n_regions,n_regions)} Subject-level functional connectivity matrices """ individual_connectivity_matrices = dict() mean_connectivity_matrix = dict() if os.path.exists(saveas): data = np.load(saveas) individual_connectivity_matrices = data['arr_0'].flatten()[0] mean_connectivity_matrix = data['arr_1'].flatten()[0] else: for kind in kinds: # Computing individual functional connectivity conn_measure = ConnectivityMeasure(cov_estimator=LedoitWolf( assume_centered=True, store_precision=True), kind=kind, vectorize=False, discard_diagonal=False) individual_connectivity_matrices[ kind] = conn_measure.fit_transform(subjects_ts) # Computing group functional connectivity if kind == 'tangent': mean_connectivity_matrix[kind] = conn_measure.mean_ else: mean_connectivity_matrix[kind] = \ individual_connectivity_matrices[kind].mean(axis=0) np.savez(saveas, individual_connectivity_matrices, mean_connectivity_matrix) return mean_connectivity_matrix, individual_connectivity_matrices
def max_ic_combine(factor_df, mret_df, factor_list, span, method='sample', weight_limit=True): """ 最大化IC加权法合成因子 参数: factor_df: DataFrame, 待合成因子值 mret_df: DataFrame, 个股收益率 factor_list: list, 待合成因子列表 span: 使用历史长度计算IC均值 method: 估计协方差矩阵的方法。'sample':直接用样本协方差矩阵;'shrunk':压缩估计 weight_limit: bool, 是否约束权重为正 返回: DataFrame, 复合因子 """ # 计算各期IC ic_df = calc_ic(factor_df, mret_df, factor_list, return_col_name='nxt1_ret', ic_type='spearman') ic_df = ic_df.sort_values('trade_date') ic_df['trade_date'] = ic_df['trade_date'].shift(-1) for fn in factor_list: ic_df[fn] = ic_df[fn].rolling(span).mean() ic_df = ic_df.dropna() # 最大化IC m_ic_df = {} for dt in ic_df['trade_date']: ic_mean = ic_df.loc[ic_df['trade_date'] == dt, factor_list].values tmp_factor_df = factor_df.loc[factor_df['trade_date'] == dt, factor_list] n = len(factor_list) # 求解最优化问题 if method == 'sample': P = matrix(2*np.cov(tmp_factor_df.T)) elif method == 'shrunk': P = matrix(2*LedoitWolf().fit(tmp_factor_df.dropna().as_matrix()).covariance_) q = matrix([0.0]*n) G = matrix(-np.identity(n)) h = matrix([0.0]*n) A = matrix(ic_mean, (1,n)) b = matrix(1.0) if weight_limit: try: res = np.array(solvers.qp(P=P,q=q,G=G,h=h, A=A,b=b)['x']) except: res = np.array(solvers.qp(P=P,q=q, A=A,b=b)['x']) else: res = np.array(solvers.qp(P=P,q=q,A=A,b=b)['x']) m_ic_df[dt] = np.array(res).reshape(n) m_ic_df = pd.DataFrame(m_ic_df, index=factor_list).T.reset_index() if weight_limit: m_ic_df[factor_list] = np.where(m_ic_df[factor_list] < 0, 0, m_ic_df[factor_list]) m_ic_df.loc[m_ic_df.sum(axis=1)==0, factor_list] = 1 m_ic_df.columns = ['trade_date']+factor_list # 因子加权 conb_df = factor_combine(factor_df, factor_list, m_ic_df) return conb_df, m_ic_df
def LW_est(X): ''' Ledoit-Wolf optimal shrinkage coefficient estimate X_size = (n_samples, n_features) ''' lw = LedoitWolf() cov_lw = lw.fit(X).covariance_ return cov_lw
def __init__(self, cov_estimator=LedoitWolf(store_precision=False), kind='covariance', memory=Memory(cachedir=None, verbose=0), memory_level=0, verbose=0): self.cov_estimator = cov_estimator self.kind = kind self.memory = memory self.memory_level = memory_level self.verbose = verbose
def __init__( self, cov_estimator=LedoitWolf(store_precision=False), prior_mean_type="geometric", shrinkage=0.5, explained_variance_threshold=0.7, ): self.cov_estimator = cov_estimator self.prior_mean_type = prior_mean_type self.shrinkage = shrinkage self.explained_variance_threshold = explained_variance_threshold
def similarity_measure_correlation(ds_tar, ds_src, results, p_value): print 'Computing Mahalanobis similarity...' #classifier = results['classifier'] #Get classifier from results classifier = results['fclf'] #Make prediction on training set, to understand data distribution prediction_src = classifier.predict(ds_src) #prediction_src = results['predictions_ds'] true_predictions = np.array(prediction_src) == ds_src.targets example_dist = dict() #Extract feature selected from each dataset if isinstance(classifier, FeatureSelectionClassifier): f_selection = results['fclf'].mapper ds_tar = f_selection(ds_tar) ds_src = f_selection(ds_src) ''' Get class distribution information: mean and covariance ''' for label in np.unique(ds_src.targets): #Get examples correctly classified mask = ds_src.targets == label example_dist[label] = dict() true_ex = ds_src.samples[mask * true_predictions] #Get Mean and Covariance to draw the distribution mean_ = np.mean(true_ex, axis=0) example_dist[label]['mean'] = mean_ ''' cov_ = np.cov(true_ex.T) example_dist[label]['cov'] = cov_ ''' print 'Estimation of covariance matrix for ' + label + ' class...' print true_ex.shape try: print 'Method is Correlation...' #print true_ex[:np.int(true_ex.shape[0]/3),:].shape #cov_ = MinCovDet().transform(true_ex) #cov_ = LedoitWolf().transform(true_ex) #cov_ = EmpiricalCovariance().transform(true_ex) #cov_ = GraphLasso(alpha=0.5).transform(true_ex) #cov_ = OAS(alpha=0.1).transform(true_ex) except MemoryError, err: print 'Method is LedoitWolf' cov_ = LedoitWolf(block_size=15000).transform(true_ex) #example_dist[label]['i_cov'] = scipy.linalg.inv(cov_) #example_dist[label]['i_cov'] = cov_.precision_ print 'Inverted covariance estimated...'
def covarianceEstimation(daily_returns, cov_estimator): lw = LedoitWolf() if cov_estimator == "shrinkage": return lw.fit(daily_returns).covariance_ elif cov_estimator == "empirical": return daily_returns.cov() elif cov_estimator == "multifactor": # FIXME return None else: raise Exception("协方差矩阵类型为[shrinkage,empirical,multifactor]")