示例#1
0
def test_corrpsd_threshold():
    x = np.array([[1, -0.9, -0.9], [-0.9, 1, -0.9], [-0.9, -0.9, 1]])

    #print np.linalg.eigvalsh(x)
    for threshold in [0, 1e-15, 1e-10, 1e-6]:

        y = corr_nearest(x, n_fact=100, threshold=threshold)
        evals = np.linalg.eigvalsh(y)
        #print 'evals', evals, threshold
        assert_allclose(evals[0], threshold, rtol=1e-6, atol=1e-15)

        y = corr_clipped(x, threshold=threshold)
        evals = np.linalg.eigvalsh(y)
        #print 'evals', evals, threshold
        assert_allclose(evals[0], threshold, rtol=0.25, atol=1e-15)

        y = cov_nearest(x, method='nearest', n_fact=100, threshold=threshold)
        evals = np.linalg.eigvalsh(y)
        #print 'evals', evals, threshold
        #print evals[0] / threshold - 1
        assert_allclose(evals[0], threshold, rtol=1e-6, atol=1e-15)

        y = cov_nearest(x, n_fact=100, threshold=threshold)
        evals = np.linalg.eigvalsh(y)
        #print 'evals', evals, threshold
        #print evals[0] / threshold - 1
        assert_allclose(evals[0], threshold, rtol=0.25, atol=1e-15)
示例#2
0
def test_corrpsd_threshold():
    x = np.array([[1, -0.9, -0.9], [-0.9, 1, -0.9], [-0.9, -0.9, 1]])

    #print np.linalg.eigvalsh(x)
    for threshold in [0, 1e-15, 1e-10, 1e-6]:

        y = corr_nearest(x, n_fact=100, threshold=threshold)
        evals = np.linalg.eigvalsh(y)
        #print 'evals', evals, threshold
        assert_allclose(evals[0], threshold, rtol=1e-6, atol=1e-15)

        y = corr_clipped(x, threshold=threshold)
        evals = np.linalg.eigvalsh(y)
        #print 'evals', evals, threshold
        assert_allclose(evals[0], threshold, rtol=0.25, atol=1e-15)

        y = cov_nearest(x, method='nearest', n_fact=100, threshold=threshold)
        evals = np.linalg.eigvalsh(y)
        #print 'evals', evals, threshold
        #print evals[0] / threshold - 1
        assert_allclose(evals[0], threshold, rtol=1e-6, atol=1e-15)

        y = cov_nearest(x, n_fact=100, threshold=threshold)
        evals = np.linalg.eigvalsh(y)
        #print 'evals', evals, threshold
        #print evals[0] / threshold - 1
        assert_allclose(evals[0], threshold, rtol=0.25, atol=1e-15)
示例#3
0
def _find_positive_definite(cov):
    """Find the nearest positive definite matrix."""
    if np.all(np.linalg.eigvalsh(cov) > 0) == 0:
        while True:
            cov_new = corr_nearest(cov)
            if np.all(np.linalg.eigvalsh(cov_new) > 0) == 1:
                cov = cov_new
                break
    return cov
示例#4
0
 def test_nearest(self):
     x = self.x
     res_r = self.res
     y = corr_nearest(x, threshold=1e-7, n_fact=100)
     #print np.max(np.abs(x - y))
     assert_almost_equal(y, res_r.mat, decimal=3)
     d = norm_f(x, y)
     assert_allclose(d, res_r.normF, rtol=0.0015)
     evals = np.linalg.eigvalsh(y)
     #print 'evals', evals / res_r.eigenvalues[::-1] - 1
     assert_allclose(evals, res_r.eigenvalues[::-1], rtol=0.003, atol=1e-7)
     #print evals[0] / 1e-7 - 1
     assert_allclose(evals[0], 1e-7, rtol=1e-6)
示例#5
0
 def test_nearest(self):
     x = self.x
     res_r = self.res
     y = corr_nearest(x, threshold=1e-7, n_fact=100)
     #print np.max(np.abs(x - y))
     assert_almost_equal(y, res_r.mat, decimal=3)
     d = norm_f(x, y)
     assert_allclose(d, res_r.normF, rtol=0.0015)
     evals = np.linalg.eigvalsh(y)
     #print 'evals', evals / res_r.eigenvalues[::-1] - 1
     assert_allclose(evals, res_r.eigenvalues[::-1], rtol=0.003, atol=1e-7)
     #print evals[0] / 1e-7 - 1
     assert_allclose(evals[0], 1e-7, rtol=1e-6)
示例#6
0
    def make_psd(self) -> "correlationEstimate":
        assets_with_data = self.assets_with_data()
        assets_without_data = self.assets_with_missing_data()

        valid_assets_corr_as_np = self.subset(assets_with_data).as_np()
        nearest_as_np_for_valid_assets = corr_nearest(valid_assets_corr_as_np,
                                                      n_fact=10)
        corr_with_valid_assets = correlationEstimate(
            values=nearest_as_np_for_valid_assets,
            columns=self.assets_with_data())
        corr_with_all = corr_with_valid_assets.add_assets_with_nan_values(
            assets_without_data)

        return corr_with_all
示例#7
0
def test_corr_psd():
    # test positive definite matrix is unchanged
    x = np.array([[1, -0.2, -0.9], [-0.2, 1, -0.2], [-0.9, -0.2, 1]])

    y = corr_nearest(x, n_fact=100)
    #print np.max(np.abs(x - y))
    assert_almost_equal(x, y, decimal=14)

    y = corr_clipped(x)
    assert_almost_equal(x, y, decimal=14)

    y = cov_nearest(x, n_fact=100)
    assert_almost_equal(x, y, decimal=14)

    x2 = x + 0.001 * np.eye(3)
    y = cov_nearest(x2, n_fact=100)
    assert_almost_equal(x2, y, decimal=14)
示例#8
0
def test_corr_psd():
    # test positive definite matrix is unchanged
    x = np.array([[1, -0.2, -0.9], [-0.2, 1, -0.2], [-0.9, -0.2, 1]])

    y = corr_nearest(x, n_fact=100)
    #print np.max(np.abs(x - y))
    assert_almost_equal(x, y, decimal=14)

    y = corr_clipped(x)
    assert_almost_equal(x, y, decimal=14)

    y = cov_nearest(x, n_fact=100)
    assert_almost_equal(x, y, decimal=14)

    x2 = x + 0.001 * np.eye(3)
    y = cov_nearest(x2, n_fact=100)
    assert_almost_equal(x2, y, decimal=14)
示例#9
0
def test_corrpsd_threshold(threshold):
    x = np.array([[1, -0.9, -0.9], [-0.9, 1, -0.9], [-0.9, -0.9, 1]])

    y = corr_nearest(x, n_fact=100, threshold=threshold)
    evals = np.linalg.eigvalsh(y)
    assert_allclose(evals[0], threshold, rtol=1e-6, atol=1e-15)

    y = corr_clipped(x, threshold=threshold)
    evals = np.linalg.eigvalsh(y)
    assert_allclose(evals[0], threshold, rtol=0.25, atol=1e-15)

    y = cov_nearest(x, method='nearest', n_fact=100, threshold=threshold)
    evals = np.linalg.eigvalsh(y)
    assert_allclose(evals[0], threshold, rtol=1e-6, atol=1e-15)

    y = cov_nearest(x, n_fact=100, threshold=threshold)
    evals = np.linalg.eigvalsh(y)
    assert_allclose(evals[0], threshold, rtol=0.25, atol=1e-15)
示例#10
0
def test_corrpsd_threshold(threshold):
    x = np.array([[1, -0.9, -0.9], [-0.9, 1, -0.9], [-0.9, -0.9, 1]])

    y = corr_nearest(x, n_fact=100, threshold=threshold)
    evals = np.linalg.eigvalsh(y)
    assert_allclose(evals[0], threshold, rtol=1e-6, atol=1e-15)

    y = corr_clipped(x, threshold=threshold)
    evals = np.linalg.eigvalsh(y)
    assert_allclose(evals[0], threshold, rtol=0.25, atol=1e-15)

    y = cov_nearest(x, method='nearest', n_fact=100, threshold=threshold)
    evals = np.linalg.eigvalsh(y)
    assert_allclose(evals[0], threshold, rtol=1e-6, atol=1e-15)

    y = cov_nearest(x, n_fact=100, threshold=threshold)
    evals = np.linalg.eigvalsh(y)
    assert_allclose(evals[0], threshold, rtol=0.25, atol=1e-15)
示例#11
0
from statsmodels.stats.correlation_tools import (corr_nearest, corr_clipped,
                                                 cov_nearest)

examples = ['all']

if 'all' in examples:
    # x0 is positive definite
    x0 = np.array([[1, -0.2, -0.9], [-0.2, 1, -0.2], [-0.9, -0.2, 1]])
    # x has negative eigenvalues, not definite
    x = np.array([[1, -0.9, -0.9], [-0.9, 1, -0.9], [-0.9, -0.9, 1]])
    #x = np.array([[1, 0.2, 0.2], [0.2, 1, 0.2], [0.2, 0.2, 1]])

    n_fact = 2

    print('evals original', np.linalg.eigvalsh(x))
    y = corr_nearest(x, n_fact=100)
    print('evals nearest', np.linalg.eigvalsh(y))
    print(y)

    y = corr_nearest(x, n_fact=100, threshold=1e-16)
    print('evals nearest', np.linalg.eigvalsh(y))
    print(y)

    y = corr_clipped(x, threshold=1e-16)
    print('evals clipped', np.linalg.eigvalsh(y))
    print(y)

    np.set_printoptions(precision=4)
    print('\nMini Monte Carlo')
    # we are simulating a uniformly distributed symmetric matrix
    #     and find close positive definite matrix
示例#12
0
from statsmodels.stats.correlation_tools import (
                 corr_nearest, corr_clipped, cov_nearest)

examples = ['all']

if 'all' in examples:
    # x0 is positive definite
    x0 = np.array([[1, -0.2, -0.9], [-0.2, 1, -0.2], [-0.9, -0.2, 1]])
    # x has negative eigenvalues, not definite
    x = np.array([[1, -0.9, -0.9], [-0.9, 1, -0.9], [-0.9, -0.9, 1]])
    #x = np.array([[1, 0.2, 0.2], [0.2, 1, 0.2], [0.2, 0.2, 1]])

    n_fact = 2

    print 'evals original', np.linalg.eigvalsh(x)
    y = corr_nearest(x, n_fact=100)
    print 'evals nearest', np.linalg.eigvalsh(y)
    print y

    y = corr_nearest(x, n_fact=100, threshold=1e-16)
    print 'evals nearest', np.linalg.eigvalsh(y)
    print y

    y = corr_clipped(x, threshold=1e-16)
    print 'evals clipped', np.linalg.eigvalsh(y)
    print y

    np.set_printoptions(precision=4)
    print '\nMini Monte Carlo'
    # we are simulating a uniformly distributed symmetric matrix
    #     and find close positive definite matrix
示例#13
0
def sample_from_corrgan(model_loc, dim=10, n_samples=1):
    # pylint: disable=import-outside-toplevel, disable=too-many-locals
    """
    Samples correlation matrices from the pre-trained CorrGAN network.

    It is reproduced with modifications from the following paper:
    `Marti, G., 2020, May. CorrGAN: Sampling Realistic Financial Correlation Matrices Using
    Generative Adversarial Networks. In ICASSP 2020-2020 IEEE International Conference on
    Acoustics, Speech and Signal Processing (ICASSP) (pp. 8459-8463). IEEE.
    <https://arxiv.org/pdf/1910.09504.pdf>`_

    It loads the appropriate CorrGAN model for the required dimension. Generates a matrix output
    from this network. Symmetries this matrix and finds the nearest correlation matrix
    that is positive semi-definite. Finally, it maximizes the sum of the similarities between
    adjacent leaves to arrange it with hierarchical clustering.

    The CorrGAN network was trained on the correlation profiles of the S&P 500 stocks. Therefore
    the output retains these properties. In addition, the final output retains the following
    6 stylized facts:

    1. Distribution of pairwise correlations is significantly shifted to the positive.

    2. Eigenvalues follow the Marchenko-Pastur distribution, but for a very large first
    eigenvalue (the market).

    3. Eigenvalues follow the Marchenko-Pastur distribution, but for a couple of other
    large eigenvalues (industries).

    4. Perron-Frobenius property (first eigenvector has positive entries).

    5. Hierarchical structure of correlations.

    6. Scale-free property of the corresponding Minimum Spanning Tree (MST).

    :param model_loc: (str) Location of folder containing CorrGAN models.
    :param dim: (int) Dimension of correlation matrix to sample.
        In the range [2, 200].
    :param n_samples: (int) Number of samples to generate.
    :return: (np.array) Sampled correlation matrices of shape (n_samples, dim, dim).
    """
    # Import here needed to prevent unnecessary imports in other parts of code.
    import tensorflow as tf

    # Validate dimension.
    if not (1 < dim <= 200):
        raise ValueError("Dimension not supported, {}".format(dim))

    # Resulting correlation matrices.
    nearest_corr_mats = []

    # Load generator model closest to the required dimension by looking at the models folder.
    dimension_from_folder = [
        int(f.split("_")[1][:-1]) for f in listdir(model_loc)
        if not path.isfile(path.join(model_loc, f))
    ]
    all_generator_dimensions = np.sort(dimension_from_folder)
    closest_dimension = next(
        filter(lambda i: i >= dim, all_generator_dimensions))

    # Load model.
    generator = tf.keras.models.load_model("{}/generator_{}d".format(
        model_loc, closest_dimension),
                                           compile=False)

    # Sample from generator. Input dimension based on network.
    noise_dim = generator.layers[0].input_shape[1]
    noise = tf.random.normal([n_samples, noise_dim])
    generated_mat = generator(noise, training=False)

    # Get the indices of an upper triangular matrix.
    tri_rows, tri_cols = np.triu_indices(dim, k=1)

    # For each sample generated, make them strict correlation matrices
    # by projecting them on the nearest correlation matrix using Higham’s
    # alternating projections method.
    for i in range(n_samples):
        # Grab only the required dimensions from generated matrix.
        corr_mat = np.array(generated_mat[i, :dim, :dim, 0])

        # Set diagonal to 1 and symmetrize.
        np.fill_diagonal(corr_mat, 1)
        corr_mat[tri_cols, tri_rows] = corr_mat[tri_rows, tri_cols]
        # Get nearest correlation matrix that is positive semi-definite.
        nearest_corr_mat = corr_nearest(corr_mat)

        # Set diagonal to 1 and symmetrize.
        np.fill_diagonal(nearest_corr_mat, 1)
        nearest_corr_mat[tri_cols, tri_rows] = nearest_corr_mat[tri_rows,
                                                                tri_cols]

        # Arrange with hierarchical clustering by maximizing the sum of the
        # similarities between adjacent leaves.
        dist = 1 - nearest_corr_mat
        linkage_mat = hierarchy.linkage(dist[tri_rows, tri_cols],
                                        method="ward")
        optimal_leaves = hierarchy.optimal_leaf_ordering(
            linkage_mat, dist[tri_rows, tri_cols])
        optimal_ordering = hierarchy.leaves_list(optimal_leaves)
        ordered_corr = nearest_corr_mat[optimal_ordering, :][:,
                                                             optimal_ordering]
        nearest_corr_mats.append(ordered_corr)

    return np.array(nearest_corr_mats)