示例#1
0
    def shrinked_covariance(returns, price_data=False, shrinkage_type='basic', assume_centered=False,
                            basic_shrinkage=0.1):
        """
        Calculates the Covariance estimator with shrinkage for a dataframe of asset prices or returns.

        This function allows three types of shrinkage - Basic, Ledoit-Wolf and Oracle Approximating Shrinkage.
        It is a wrap of the sklearn's ShrunkCovariance, LedoitWolf and OAS classes. According to the
        scikit-learn User Guide on Covariance estimation:

        "Sometimes, it even occurs that the empirical covariance matrix cannot be inverted for numerical
        reasons. To avoid such an inversion problem, a transformation of the empirical covariance matrix
        has been introduced: the shrinkage. Mathematically, this shrinkage consists in reducing the ratio
        between the smallest and the largest eigenvalues of the empirical covariance matrix".

        Link to the documentation:
        <https://scikit-learn.org/stable/modules/covariance.html>`_

        If a dataframe of prices is given, it is transformed into a dataframe of returns using
        the calculate_returns method from the ReturnsEstimators class.

        :param returns: (pd.DataFrame) Dataframe where each column is a series of returns or prices for an asset.
        :param price_data: (bool) Flag if prices of assets are used and not returns. (False by default)
        :param shrinkage_type: (str) Type of shrinkage to use. (``basic`` by default, ``lw``, ``oas``, ``all``)
        :param assume_centered: (bool) Flag for data with mean almost, but not exactly zero.
                                       (Read documentation for chosen shrinkage class, False by default)
        :param basic_shrinkage: (float) Between 0 and 1. Coefficient in the convex combination for basic shrinkage.
                                        (0.1 by default)
        :return: (np.array) Estimated covariance matrix. Tuple of covariance matrices if shrinkage_type = ``all``.
        """

        # Calculating the series of returns from series of prices
        if price_data:
            # Class with returns calculation function
            ret_est = ReturnsEstimators()

            # Calculating returns
            returns = ret_est.calculate_returns(returns)

        # Calculating the covariance matrix for the chosen method
        if shrinkage_type == 'basic':
            cov_matrix = ShrunkCovariance(assume_centered=assume_centered, shrinkage=basic_shrinkage).fit(
                returns).covariance_
        elif shrinkage_type == 'lw':
            cov_matrix = LedoitWolf(assume_centered=assume_centered).fit(returns).covariance_
        elif shrinkage_type == 'oas':
            cov_matrix = OAS(assume_centered=assume_centered).fit(returns).covariance_
        else:
            cov_matrix = (
                ShrunkCovariance(assume_centered=assume_centered, shrinkage=basic_shrinkage).fit(returns).covariance_,
                LedoitWolf(assume_centered=assume_centered).fit(returns).covariance_,
                OAS(assume_centered=assume_centered).fit(returns).covariance_)

        return cov_matrix
示例#2
0
def similarity_measure_mahalanobis(ds_tar, ds_src, results, p_value=0.95):

    print 'Computing Mahalanobis similarity...'

    # TODO: The function parameters must be the two datasets,
    # TODO: src is the one with parameter calculation, second is the similarity one

    #  Get classifier from results
    classifier = results['fclf']

    # Make prediction on training set, to understand data distribution
    ## TODO: Evaluate if it is correct!
    classifier_predictions_src = classifier.predict(ds_src)
    prediction_mask = np.array(classifier_predictions_src) == ds_src.targets
    example_dist = dict()

    # Extract feature selected from each dataset
    if isinstance(classifier, FeatureSelectionClassifier):
        f_selection = results['fclf'].mapper
        ds_tar = f_selection(ds_tar)
        ds_src = f_selection(ds_src)
    '''
    Get class distribution information: mean and covariance
    '''

    for label in np.unique(ds_src.targets):

        # Get examples correctly classified
        mask = ds_src.targets == label
        example_dist[label] = dict()
        true_ex = ds_src.samples[mask * prediction_mask]

        # Get Mean and Covariance to draw the distribution
        # We evaluate mean and cov only on well-classified examples
        mean_ = np.mean(true_ex, axis=0)
        example_dist[label]['mean'] = mean_

        print 'Estimation of covariance matrix for ' + label + ' class...'
        print true_ex.shape

        try:
            #cov_ = MinCovDet().transform(true_ex)
            cov_ = LedoitWolf().transform(true_ex)
            #cov_ = EmpiricalCovariance().transform(true_ex)
            #cov_ = GraphLasso(alpha=0.5).transform(true_ex)
            #cov_ = OAS(alpha=0.1).transform(true_ex)
        except MemoryError, err:
            print 'Method is LedoitWolf'
            cov_ = LedoitWolf(block_size=15000).transform(true_ex)

        example_dist[label]['i_cov'] = cov_.precision_
        print 'Inverted covariance estimated...'
示例#3
0
def test_ledoit_wolf_large():
    # test that ledoit_wolf doesn't error on data that is wider than block_size
    rng = np.random.RandomState(0)
    # use a number of features that is larger than the block-size
    X = rng.normal(size=(10, 20))
    lw = LedoitWolf(block_size=10).fit(X)
    # check that covariance is about diagonal (random normal noise)
    assert_almost_equal(lw.covariance_, np.eye(20), 0)
    cov = lw.covariance_

    # check that the result is consistent with not splitting data into blocks.
    lw = LedoitWolf(block_size=25).fit(X)
    assert_almost_equal(lw.covariance_, cov)
示例#4
0
文件: utils.py 项目: EliHei2/scPotter
def lw(data, alphas):
    """
        Estimates the graph with Ledoit-Wolf estimator.

        Parameters
        ----------
        data: numpy ndarray
            The input data for to reconstruct/estimate a graph on. Features as columns and observations as rows.
        alphas: float
            The threshold on the precision matrix to determine edges.
        Returns
        -------
        adjacency matrix : the estimated adjacency matrix.
    """
    alpha=alphas
    scaler = StandardScaler()
    data = scaler.fit_transform(data)
    cov = LedoitWolf().fit(data)
    precision_matrix = cov.get_precision()
    n_features, _ = precision_matrix.shape
    mask1 = np.abs(precision_matrix) > alpha
    mask0 = np.abs(precision_matrix) <= alpha
    adjacency_matrix = np.zeros((n_features,n_features))
    adjacency_matrix[mask1] = 1
    adjacency_matrix[mask0] = 0
    adjacency_matrix[np.diag_indices_from(adjacency_matrix)] = 0
    return adjacency_matrix
 def estimatorLedoitWolf(self):
     #remove Date column for this function
     trimmedData = self.data.drop('Date', axis=1)
     cov = LedoitWolf().fit(trimmedData).covariance_  #centers the data
     assert cov.shape == self.expectedCovShape
     self.cov = cov
     return self.cov
示例#6
0
    def __call__(self, train_list, rest_list, clear_after_use=False):

        print("Apply Whitening...")

        if clear_after_use:
            self.sigma_neg_sqrt = None
            self.shrinkage_parameter = None

        if self.sigma_neg_sqrt is None:
            train_stacked = np.concatenate([d.x for d in train_list], axis=0)
            # Fit LedoitWolf for covariance estimation
            lw = LedoitWolf().fit(train_stacked)
            self.shrinkage_parameter = lw.shrinkage_
            print("   Estimated shrinkage-parameter={:.3f}".format(
                self.shrinkage_parameter))
            # estimated covariance matrix
            sigma = lw.covariance_
            # eigenvalue decomposition
            eig_values, eig_vectors = np.linalg.eig(sigma)
            # negative square root of eigenvalues
            eig_values_neg_sqrt = np.diag(1 / np.sqrt(eig_values + self.eps))
            # negative square root of sigma
            self.sigma_neg_sqrt = np.dot(
                np.dot(eig_vectors, eig_values_neg_sqrt), eig_vectors.T)

        def tensor_whiten(data):
            x = data.x

            x = np.dot(x, self.sigma_neg_sqrt)

            return RawData.create_from_ref(data, x=x)

        return self.transform(tensor_whiten, train_list, rest_list)
示例#7
0
def filter_W_fromVcv(vcv, variance_perc=1.0):
    '''vcv is a filtered value for the Vcv, with shapes T,N,N.
    It filters init_W,init_df that are the initial distribution parameters for W's posterior.
    W is the diffusion matrix of the components of the cholesky-decomposition of vcv.
    It filters also init_vcv_std, the standard deviations of this components' posteriors.
    '''
    [T, N, _] = vcv.shape
    num_tril = int(N * (N + 1) / 2)
    chol_vcv = np.zeros([T, int(N * (N + 1) / 2)])
    ind = indexes_librarian(N)
    for t in range(T):
        cvcv = np.linalg.cholesky(vcv[t])
        chol_vcv[t, ind.spiral_diag] = inv_softplus(cvcv[ind.diag[0],
                                                         ind.diag[1]])
        chol_vcv[t, ind.spiral_udiag] = cvcv[ind.udiag[0], ind.udiag[1]]
    cov = LedoitWolf().fit(chol_vcv[1:, :] - chol_vcv[:-1, :])
    init_W = cov.covariance_
    try:
        np.linalg.cholesky(init_W)
    except:
        #adds a constant term if init_W is singular
        print('W resulted singular, a correction term (I*1e-4) is added')
        init_W += np.eye(num_tril) * 1e-4
    init_df = np.max([4 * num_tril / variance_perc, num_tril])
    init_W *= 2
    init_vcv_std = np.abs(chol_vcv) * 0.1 / N * variance_perc
    #init_vcv_std=np.tile(np.reshape(np.abs(vcv).mean(axis=0),[1,N,N]),[T,1,1])/np.sqrt(N)*variance_perc
    return np.float32(init_W), np.float32(init_df), np.float32(init_vcv_std)
示例#8
0
def query_samples_and_probabilities(pydc,query,evidence,var,std=False):
  pydc.queryWithSamples(NUM_SAMPLES,query,evidence,var,FLAG,BIGNUM)
  parsed_samples = ast.literal_eval(pydc.samples)
  values = []
  weights = []
  for sample in parsed_samples:
    x, w = sample[0], sample[1]
    values  += [x]
    weights += [w]
  values, weights = np.array(values), np.array(weights)
  if std:
    avg, std = weighted_avg_and_std(values, weights)
    return avg, std
  else:
    #values = values + 1e-5*np.random.rand(*values.shape)
    avg, cov = weighted_avg_and_cov(values, weights)
    print avg
    #X = np.random.multivariate_normal(mean=avg,cov=cov,size=100)
    #shcov = LedoitWolf().fit(X)
    #assert cov is positive-semidefinite
    try:
      assert(np.all(np.linalg.eigvals(cov) >= 0))
    except AssertionError:
      X = np.random.multivariate_normal(mean=avg,cov=cov,size=100)
      shcov = LedoitWolf().fit(X)
      avg, cov = shcov.location_, shcov.covariance_
      #assert(np.all(np.linalg.eigvals(cov) >= 0))
    return (avg, cov)
示例#9
0
def compute_connectivity_subject(conn, masker, func, confound=None):
    """ Returns connectivity of one fMRI for a given atlas
    """

    ts = do_mask_img(masker, func, confound)

    if conn == 'gl':
        fc = GraphLassoCV(max_iter=1000)
    elif conn == 'lw':
        fc = LedoitWolf()
    elif conn == 'oas':
        fc = OAS()
    elif conn == 'scov':
        fc = ShrunkCovariance()

    fc = Bunch(covariance_=0, precision_=0)

    if conn == 'corr' or conn == 'pcorr':
        fc = Bunch(covariance_=0, precision_=0)
        fc.covariance_ = np.corrcoef(ts)
        fc.precision_ = partial_corr(ts)
    else:
        fc.fit(ts)
    ind = np.tril_indices(ts.shape[1], k=-1)
    return fc.covariance_[ind], fc.precision_[ind]
示例#10
0
    def _simulate_covariance(mu_vector, cov_matrix, num_obs, lw_shrinkage=False):
        """
        Derives an empirical vector of means and an empirical covariance matrix.

        Based on the set of true means vector and covariance matrix of X distributions,
        the function generates num_obs observations for every X.
        Based on these observations simulated vector of means and the simulated covariance
        matrix are obtained.

        :param mu_vector: (np.array) True means vector for X distributions
        :param cov_matrix: (np.array) True covariance matrix for X distributions
        :param num_obs: (int) Number of observations to draw for every X
        :param lw_shrinkage: (bool) Flag to apply Ledoit-Wolf shrinkage to X (False by default)
        :return: (np.array, np.array) Empirical means vector, empirical covariance matrix
        """

        # Generating a matrix of num_obs observations for X distributions
        observations = np.random.multivariate_normal(mu_vector.flatten(), cov_matrix, size=num_obs)

        # Empirical means vector calculation
        mu_simulated = observations.mean(axis=0).reshape(-1, 1)

        if lw_shrinkage:  # If applying Ledoit-Wolf shrinkage
            cov_simulated = LedoitWolf().fit(observations).covariance_

        else:  # Simple empirical covariance matrix
            cov_simulated = np.cov(observations, rowvar=False)

        return mu_simulated, cov_simulated
def postProcessing(nifti_file, subject_key, spheres_masker):
    """Perform post processing
	param nifti_file: string. path to the nifty file
    param subject_key: string. subject's key
	return: dictionary raw. 
		key: subject's key . 
		value: {"time_series" : matrix of time series (time_points,rois), "covariance" : covariance matrix of atlas rois (rois, rois),
			"correlation" : correlation matrix of atlas rois (rois, rois)}
    """
    try:
        print("subject_key: " + subject_key)
        print("Extract timeseries")
        # Extract the time series
        print(nifti_file)
        timeseries = spheres_masker.fit_transform(nifti_file, confounds=None)
        print("Extract covariance matrix")
        cov_measure = ConnectivityMeasure(cov_estimator=LedoitWolf(
            assume_centered=False, block_size=1000, store_precision=False),
                                          kind='covariance')
        cov = []
        cor = []
        cov = cov_measure.fit_transform([timeseries])[0, :, :]
        print("Extract correlation matrix")
        cor = nilearn.connectome.cov_to_corr(cov)
    except:

        raise Exception("subject_key: %s \n" % subject_key +
                        traceback.format_exc())
    return (subject_key, {
        "time_series": timeseries,
        "covariance": cov,
        "correlation": cor
    })
def compute_network_connectivity_subject(conn, func, masker, rois):
    """ Returns connectivity of one fMRI for a given atlas
    """
    ts = masker.fit_transform(func)
    ts = np.asarray(ts)[:, rois]

    if conn == 'gl':
        fc = GraphLassoCV(max_iter=1000)
    elif conn == 'lw':
        fc = LedoitWolf()
    elif conn == 'oas':
        fc = OAS()
    elif conn == 'scov':
        fc = ShrunkCovariance()

        fc = Bunch(covariance_=0, precision_=0)

    if conn == 'corr' or conn == 'pcorr':
        fc = Bunch(covariance_=0, precision_=0)
        fc.covariance_ = np.corrcoef(ts)
        fc.precision_ = partial_corr(ts)
    else:
        fc.fit(ts)
    ind = np.tril_indices(ts.shape[1], k=-1)
    return fc.covariance_[ind], fc.precision_[ind]
示例#13
0
def weight_opt(returns,benchmark, lower = 0, upper = 1, ph=2**7, cov_method='sample', seed = 123):
    np.random.seed(seed)
    n_asset, n_sample = returns.shape
    rets = np.asmatrix(returns)  
    #N = 10
    #phs = [2**(t-2) for t in range(N)]  
    # Convert to cvxopt matrices 
    if cov_method == 'sample':
        Cov = opt.matrix(np.cov(rets,benchmark))
    elif cov_method == 'lw':
        Cov = opt.matrix(LedoitWolf().fit(np.append(np.transpose(rets),benchmark.reshape(n_sample,1), axis=1)).covariance_)
    else:
        raise ValueError('cov_method should be in {}'.format({'sample', 'lw'}))
    S = Cov[:n_asset,:n_asset]
    r_mean = opt.matrix(np.nanmean(rets, axis=1)) # n*1
    Cb = Cov[:n_asset,n_asset]
    # Create constraint matrices  
    G = opt.matrix(np.append(np.eye(n_asset),-np.eye(n_asset),axis = 0))   # 2n x n identity matrix  
    h = opt.matrix(np.append(upper*np.ones((n_asset,1)),-lower*np.ones((n_asset,1)),axis = 0)) 
    A = opt.matrix(1.0, (1, n_asset))  
    b = opt.matrix(1.0)  
    # Calculate efficient frontier weights using quadratic programming  
    x = solvers.qp(ph*S, -ph*Cb-r_mean, G, h, A, b)['x']
    #portfolios = [solvers.qp(ph*S, -ph*Cb-r_mean, G, h, A, b)['x']  
    #              for ph in phs]  
    # CALCULATE RISKS AND RETURNS FOR FRONTIER  
    ret = blas.dot(r_mean, x)
    #[blas.dot(r_mean, x) for x in portfolios]  
    errors = blas.dot(x, S*x)+Cov[n_asset,n_asset]-2*blas.dot(Cb,x)
    #[blas.dot(x, S*x)+Cov[n_asset,n_asset]-2*blas.dot(Cb,x) for x in portfolios]  
    return np.transpose(np.array(x))[0], ret, errors#, ret_opt, risk_opt   
def test_lda_predict():
    # Test LDA classification.
    # This checks that LDA implements fit and predict and returns correct
    # values for simple toy data.
    for test_case in solver_shrinkage:
        solver, shrinkage = test_case
        clf = LinearDiscriminantAnalysis(solver=solver, shrinkage=shrinkage)
        y_pred = clf.fit(X, y).predict(X)
        assert_array_equal(y_pred, y, "solver %s" % solver)

        # Assert that it works with 1D data
        y_pred1 = clf.fit(X1, y).predict(X1)
        assert_array_equal(y_pred1, y, "solver %s" % solver)

        # Test probability estimates
        y_proba_pred1 = clf.predict_proba(X1)
        assert_array_equal((y_proba_pred1[:, 1] > 0.5) + 1, y,
                           "solver %s" % solver)
        y_log_proba_pred1 = clf.predict_log_proba(X1)
        assert_allclose(
            np.exp(y_log_proba_pred1),
            y_proba_pred1,
            rtol=1e-6,
            atol=1e-6,
            err_msg="solver %s" % solver,
        )

        # Primarily test for commit 2f34950 -- "reuse" of priors
        y_pred3 = clf.fit(X, y3).predict(X)
        # LDA shouldn't be able to separate those
        assert np.any(y_pred3 != y3), "solver %s" % solver

    clf = LinearDiscriminantAnalysis(solver="svd", shrinkage="auto")
    with pytest.raises(NotImplementedError):
        clf.fit(X, y)

    clf = LinearDiscriminantAnalysis(solver="lsqr",
                                     shrinkage=0.1,
                                     covariance_estimator=ShrunkCovariance())
    with pytest.raises(
            ValueError,
            match=("covariance_estimator and shrinkage "
                   "parameters are not None. "
                   "Only one of the two can be set."),
    ):
        clf.fit(X, y)

    # test bad solver with covariance_estimator
    clf = LinearDiscriminantAnalysis(solver="svd",
                                     covariance_estimator=LedoitWolf())
    with pytest.raises(ValueError,
                       match="covariance estimator is not supported with svd"):
        clf.fit(X, y)

    # test bad covariance estimator
    clf = LinearDiscriminantAnalysis(solver="lsqr",
                                     covariance_estimator=KMeans(
                                         n_clusters=2, n_init="auto"))
    with pytest.raises(ValueError):
        clf.fit(X, y)
示例#15
0
def simulateLogNormal(data, covtype='Estimate', nsamples=2000, **kwargs):
    """

    :param data:
    :param covtype: Type of covariance matrix estimator. Allowed types are:
        - Estimate (default):
        - Diagonal:
        - Shrinkage OAS:
    :param int nsamples: Number of simulated samples to draw
    :return: simulated data and empirical covariance est
    """

    try:
        # Offset data to make sure there are no 0 values for log transform
        offset = np.min(data) + 1
        offdata = data + offset

        # log on the offsetted data
        logdata = np.log(offdata)
        # Get the means
        meanslog = np.mean(logdata, axis=0)

        # Specify covariance
        # Regular covariance estimator
        if covtype == "Estimate":
            covlog = np.cov(logdata, rowvar=0)
        # Shrinkage covariance estimator, using LedoitWolf
        elif covtype == "ShrinkageLedoitWolf":
            scov = LedoitWolf()
            scov.fit(logdata)
            covlog = scov.covariance_
        elif covtype == "ShrinkageOAS":
            scov = OAS()
            scov.fit(logdata)
            covlog = scov.covariance_

        # Diagonal covariance matrix (no between variable correlation)
        elif covtype == "Diagonal":
            covlogdata = np.var(
                logdata, axis=0)  #get variance of log data by each column
            covlog = np.diag(
                covlogdata
            )  #generate a matrix with diagonal of variance of log Data
        else:
            raise ValueError('Unknown Covariance type')

        simData = np.random.multivariate_normal(meanslog, covlog, nsamples)
        simData = np.exp(simData)
        simData -= offset

        ##Set to 0 negative values
        simData[np.where(simData < 0)] = 0
        # work out the correlation of matrix by columns, each column is a variable
        corrMatrix = np.corrcoef(simData, rowvar=0)

        return simData, corrMatrix

    except Exception as exp:
        raise exp
示例#16
0
    def __init__(self, sharpes, returns):
        """
        Initialize AuthorModelBuilder object.

        Parameters
        ----------
        sharpes : pd.DataFrame
            Long-format DataFrame of in-sample Sharpe ratios (from user-run
            backtests), indexed by user, algorithm and code ID.
            Note that currently, backtests are deduplicated based on code id.
            See fit_authors for more information.
        """
        self.num_authors = sharpes.meta_user_id.nunique()
        self.num_algos = sharpes.meta_algorithm_id.nunique()
        # For num_backtests, nunique() and count() should be the same
        self.num_backtests = sharpes.meta_code_id.nunique()

        # Which algos correspond to which authors?
        df = (sharpes.loc[:, ['meta_user_id', 'meta_algorithm_id']].
              drop_duplicates(
                  subset='meta_algorithm_id',
                  keep='first').reset_index().meta_user_id.astype(str))
        self.author_to_algo_encoding = LabelEncoder().fit_transform(df)

        # Which backtests correspond to which algos?
        df = sharpes.meta_algorithm_id.astype(str)
        self.algo_to_backtest_encoding = LabelEncoder().fit_transform(df)

        # Which backtests correspond to which authors?
        df = sharpes.meta_user_id.astype(str)
        self.author_to_backtest_encoding = LabelEncoder().fit_transform(df)

        # Construct correlation matrix.
        # 0 is a better estimate for mean returns than the sample mean!
        returns_ = returns / returns.std()
        self.corr = LedoitWolf(assume_centered=True).fit(returns_).covariance_

        self.model = self._build_model(sharpes, self.corr)

        self.coords = {
            'meta_user_id': sharpes.meta_user_id.drop_duplicates().values,
            'meta_algorithm_id':
            sharpes.meta_algorithm_id.drop_duplicates().values,
            'meta_code_id': sharpes.meta_code_id.values
        }

        self.dims = {
            'mu_global': (),
            'mu_author': ('meta_user_id', ),
            'mu_author_raw': ('meta_user_id', ),
            'mu_author_sd': (),
            'mu_algo': ('meta_algorithm_id', ),
            'mu_algo_raw': ('meta_algorithm_id', ),
            'mu_algo_sd': (),
            'mu_backtest': ('meta_code_id', ),
            'sigma_backtest': ('meta_code_id', ),
            'alpha_author': ('meta_user_id', ),
            'alpha_algo': ('meta_algorithm_id', )
        }
示例#17
0
def GetModelParams(DataFrame, ColumnIndex):

    cDataSet = DataFrame

    cData0 = cDataSet[cDataSet['target'] == 0]
    cData1 = cDataSet[cDataSet['target'] == 1]

    bData0 = np.array(cData0[ColumnIndex])
    bData1 = np.array(cData1[ColumnIndex])

    Cov0 = LedoitWolf(assume_centered=False).fit(bData0)
    Cov1 = LedoitWolf(assume_centered=False).fit(bData1)

    Mean0 = bData0.mean(axis=0)
    Mean1 = bData1.mean(axis=0)

    return Cov0.covariance_, Cov1.covariance_, Mean0, Mean1
示例#18
0
def LedoitWolf_covMatrix(X):
    logger.info(
        'Se realiza el calculo de la matriz de covarianza con Shrinkage')
    cov = LedoitWolf().fit(X)
    cov_matrix = cov.covariance_
    mean_vector = cov.location_

    return cov_matrix, mean_vector
def simCovMu(mu0, cov0, nObs, shrink=False):
    x = np.random.multivariate_normal(mu0.flatten(), cov0, size = nObs)
    #print(x.shape)
    mu1 = x.mean(axis = 0).reshape(-1,1) #calc mean of columns of rand matrix
    #print(mu1.shape)
    if shrink: cov1 = LedoitWolf().fit(x).covariance_
    else: cov1 = np.cov(x, rowvar=0)
    return mu1, cov1
示例#20
0
def prior_vector_variability(x):
    """
    Estimate the covariance matrix of x with the LedoitWolf estimator
    :param x: an array of dim (t,n)
    :return: The estimated covariance matrix
    """
    dx = LedoitWolf().fit(x).covariance_
    return dx
示例#21
0
def test_ledoit_wolf_small():
    # Compare our blocked implementation to the naive implementation
    X_small = X[:, :4]
    lw = LedoitWolf()
    lw.fit(X_small)
    shrinkage_ = lw.shrinkage_

    assert_almost_equal(shrinkage_, _naive_ledoit_wolf_shrinkage(X_small))
示例#22
0
def untangle(X: Iterable,
             y: Iterable,
             n_clusters: int = None,
             get_connectivity: bool = True,
             compute_distances: bool = True,
             kind: str = 'correlation',
             agglo_kws: Union[dict, Bunch] = None) -> FeatureAgglomeration:

    from nilearn.connectome import ConnectivityMeasure as CM
    from sklearn.cluster import FeatureAgglomeration
    from sklearn.covariance import LedoitWolf
    from sklearn.feature_selection import SelectKBest
    from sklearn.feature_selection import mutual_info_classif

    agglo_defs = dict(affinity='euclidean',
                      compute_full_tree='auto',
                      linkage='ward',
                      pooling_func=np.mean,
                      distance_threshold=None,
                      compute_distances=compute_distances)

    if get_connectivity is True:
        connect_mat = CM(LedoitWolf(), kind=kind).fit_transform([X.values])[0]
    else:
        connect_mat = None

    if n_clusters is None:
        n_clusters = divmod(X.shape[1], 2)[0] - 1
        if n_clusters == 0:
            n_clusters = 1

    if agglo_kws is None:
        agglo_kws = {}
    agglo_defs.update(agglo_kws)

    agglo = FeatureAgglomeration(n_clusters=n_clusters,
                                 connectivity=connect_mat,
                                 **agglo_defs)
    if not isinstance(y, pd.Series):
        y = pd.Series(y)
    if not isinstance(X, pd.DataFrame):
        X = pd.DataFrame(X)

    agglo.fit(X, y)

    setattr(
        agglo, 'cluster_indexes_',
        pd.DataFrame(zip(agglo.labels_, agglo.feature_names_in_),
                     columns=['cluster',
                              'feature']).groupby('cluster').feature)

    skb = SelectKBest(k=1, score_func=mutual_info_classif)
    factor_leaders_ = [
        skb.fit(X[itm[1]], y).get_feature_names_out()[0]
        for itm in tuple(agglo.cluster_indexes_)
    ]
    setattr(agglo, 'factor_leaders_', factor_leaders_)
    return agglo
示例#23
0
 def __init__(self,
              cov_estimator=LedoitWolf(store_precision=False),
              kind='covariance',
              vectorize=False,
              discard_diagonal=False):
     self.cov_estimator = cov_estimator
     self.kind = kind
     self.vectorize = vectorize
     self.discard_diagonal = discard_diagonal
示例#24
0
def connectivity(subjects_ts, kinds=kinds, saveas='file'):
    """
    Estimates Functional Connectivity using several estimation models 
    Parameters
    ----------
    subjects_ts: array-like , 2-D (n_subjects,n_regions)
                 Array of BOLD time-series  
    
    kinds: list of kinds of connectivity measure to be computed . kinds include : 
        ' correlation ' , ' partial correlation', ' tangent' , 'covariance' . 
                                                
    
    saveas : Destination to save and load output (.npz)
    
    Returns
    ---------
    mean_connectivity_matrix: dictionary ,  {'kind' : (n_regions,n_regions)} 
                              Group-level functional connectivity matrix
    individual_connectivity_matrix: dictionary , {'kind' : (n_subjects,n_regions,n_regions)}
                              Subject-level functional connectivity matrices
                 
    """

    individual_connectivity_matrices = dict()

    mean_connectivity_matrix = dict()

    if os.path.exists(saveas):
        data = np.load(saveas)
        individual_connectivity_matrices = data['arr_0'].flatten()[0]
        mean_connectivity_matrix = data['arr_1'].flatten()[0]
    else:

        for kind in kinds:

            # Computing individual functional connectivity

            conn_measure = ConnectivityMeasure(cov_estimator=LedoitWolf(
                assume_centered=True, store_precision=True),
                                               kind=kind,
                                               vectorize=False,
                                               discard_diagonal=False)

            individual_connectivity_matrices[
                kind] = conn_measure.fit_transform(subjects_ts)

            # Computing group functional connectivity

            if kind == 'tangent':
                mean_connectivity_matrix[kind] = conn_measure.mean_
            else:
                mean_connectivity_matrix[kind] = \
                individual_connectivity_matrices[kind].mean(axis=0)
            np.savez(saveas, individual_connectivity_matrices,
                     mean_connectivity_matrix)

    return mean_connectivity_matrix, individual_connectivity_matrices
示例#25
0
def max_ic_combine(factor_df, mret_df, factor_list, span, method='sample', weight_limit=True):
    """
    最大化IC加权法合成因子
    参数:
        factor_df: DataFrame, 待合成因子值
        mret_df: DataFrame, 个股收益率
        factor_list: list, 待合成因子列表
        span: 使用历史长度计算IC均值
        method: 估计协方差矩阵的方法。'sample':直接用样本协方差矩阵;'shrunk':压缩估计
        weight_limit: bool, 是否约束权重为正
    返回:
        DataFrame, 复合因子
    """
    # 计算各期IC
    ic_df = calc_ic(factor_df, mret_df, factor_list, return_col_name='nxt1_ret', ic_type='spearman')
    ic_df = ic_df.sort_values('trade_date')
    ic_df['trade_date'] = ic_df['trade_date'].shift(-1)
    for fn in factor_list:
        ic_df[fn] = ic_df[fn].rolling(span).mean()
    ic_df = ic_df.dropna()
    
    # 最大化IC
    m_ic_df = {}
    for dt in ic_df['trade_date']:
        ic_mean = ic_df.loc[ic_df['trade_date'] == dt, factor_list].values
        tmp_factor_df = factor_df.loc[factor_df['trade_date'] == dt, factor_list]

        n = len(factor_list)
        # 求解最优化问题
        if method == 'sample':
            P = matrix(2*np.cov(tmp_factor_df.T))
        elif method == 'shrunk':
            P = matrix(2*LedoitWolf().fit(tmp_factor_df.dropna().as_matrix()).covariance_)
        q = matrix([0.0]*n)
        G = matrix(-np.identity(n))
        h = matrix([0.0]*n)
        A = matrix(ic_mean, (1,n))
        b = matrix(1.0)
        if weight_limit:
            try:
                res = np.array(solvers.qp(P=P,q=q,G=G,h=h, A=A,b=b)['x'])
            except:
                res = np.array(solvers.qp(P=P,q=q, A=A,b=b)['x'])
        else:
            res = np.array(solvers.qp(P=P,q=q,A=A,b=b)['x'])

        m_ic_df[dt] = np.array(res).reshape(n)
    m_ic_df = pd.DataFrame(m_ic_df, index=factor_list).T.reset_index()
    if weight_limit:
        m_ic_df[factor_list] = np.where(m_ic_df[factor_list] < 0, 0, m_ic_df[factor_list])
    m_ic_df.loc[m_ic_df.sum(axis=1)==0, factor_list] = 1 
    m_ic_df.columns = ['trade_date']+factor_list
    
    # 因子加权
    conb_df = factor_combine(factor_df, factor_list, m_ic_df)
    return conb_df, m_ic_df
示例#26
0
def LW_est(X):
    '''
    Ledoit-Wolf optimal shrinkage coefficient estimate
    X_size = (n_samples, n_features)
    '''

    lw = LedoitWolf()
    cov_lw = lw.fit(X).covariance_

    return cov_lw
示例#27
0
 def __init__(self,
              cov_estimator=LedoitWolf(store_precision=False),
              kind='covariance',
              memory=Memory(cachedir=None, verbose=0),
              memory_level=0,
              verbose=0):
     self.cov_estimator = cov_estimator
     self.kind = kind
     self.memory = memory
     self.memory_level = memory_level
     self.verbose = verbose
示例#28
0
 def __init__(
     self,
     cov_estimator=LedoitWolf(store_precision=False),
     prior_mean_type="geometric",
     shrinkage=0.5,
     explained_variance_threshold=0.7,
 ):
     self.cov_estimator = cov_estimator
     self.prior_mean_type = prior_mean_type
     self.shrinkage = shrinkage
     self.explained_variance_threshold = explained_variance_threshold
示例#29
0
def similarity_measure_correlation(ds_tar, ds_src, results, p_value):

    print 'Computing Mahalanobis similarity...'
    #classifier = results['classifier']

    #Get classifier from results
    classifier = results['fclf']

    #Make prediction on training set, to understand data distribution
    prediction_src = classifier.predict(ds_src)
    #prediction_src = results['predictions_ds']
    true_predictions = np.array(prediction_src) == ds_src.targets
    example_dist = dict()

    #Extract feature selected from each dataset
    if isinstance(classifier, FeatureSelectionClassifier):
        f_selection = results['fclf'].mapper
        ds_tar = f_selection(ds_tar)
        ds_src = f_selection(ds_src)
    '''
    Get class distribution information: mean and covariance
    '''

    for label in np.unique(ds_src.targets):

        #Get examples correctly classified
        mask = ds_src.targets == label
        example_dist[label] = dict()
        true_ex = ds_src.samples[mask * true_predictions]

        #Get Mean and Covariance to draw the distribution
        mean_ = np.mean(true_ex, axis=0)
        example_dist[label]['mean'] = mean_
        '''
        cov_ = np.cov(true_ex.T)
        example_dist[label]['cov'] = cov_
        '''
        print 'Estimation of covariance matrix for ' + label + ' class...'
        print true_ex.shape
        try:
            print 'Method is Correlation...'
            #print true_ex[:np.int(true_ex.shape[0]/3),:].shape
            #cov_ = MinCovDet().transform(true_ex)
            #cov_ = LedoitWolf().transform(true_ex)
            #cov_ = EmpiricalCovariance().transform(true_ex)
            #cov_ = GraphLasso(alpha=0.5).transform(true_ex)
            #cov_ = OAS(alpha=0.1).transform(true_ex)
        except MemoryError, err:
            print 'Method is LedoitWolf'
            cov_ = LedoitWolf(block_size=15000).transform(true_ex)

        #example_dist[label]['i_cov'] = scipy.linalg.inv(cov_)
        #example_dist[label]['i_cov'] = cov_.precision_
        print 'Inverted covariance estimated...'
示例#30
0
def covarianceEstimation(daily_returns, cov_estimator):
    lw = LedoitWolf()
    if cov_estimator == "shrinkage":
        return lw.fit(daily_returns).covariance_
    elif cov_estimator == "empirical":
        return daily_returns.cov()
    elif cov_estimator == "multifactor":
        # FIXME
        return None
    else:
        raise Exception("协方差矩阵类型为[shrinkage,empirical,multifactor]")