Пример #1
0
def repair_covariance(M2, reg_cov):
    '''
    Suppress numeric errors by keeping
    covariance matrix positive semidefiniteb
    '''
    K = M2.shape[0]
    strength = reg_cov + max(0, -np.min(np.diag(M2)))
    M2 = 0.5 * (M2 + M2.T) + strength * np.eye(K)
    for retry in range(20):
        try:
            ch = chol(M2)
            break
        except LinAlgError:
            M2 += strength * np.eye(K)
            strength *= 2
            try:
                with warnings.catch_warnings():
                    warnings.filterwarnings('ignore')
                    M2 = cov_nearest(M2, method="clipped")
            except (KeyboardInterrupt, SystemExit):
                raise
            except:
                # covariance repair failed!
                try:
                    with warnings.catch_warnings():
                        warnings.filterwarnings('ignore')
                        M2 = cov_nearest(M2, method="nearest")
                except (KeyboardInterrupt, SystemExit):
                    raise
                except:
                    # covariance repair failed!
                    pass
    return M2
Пример #2
0
def test_corrpsd_threshold():
    x = np.array([[1, -0.9, -0.9], [-0.9, 1, -0.9], [-0.9, -0.9, 1]])

    #print np.linalg.eigvalsh(x)
    for threshold in [0, 1e-15, 1e-10, 1e-6]:

        y = corr_nearest(x, n_fact=100, threshold=threshold)
        evals = np.linalg.eigvalsh(y)
        #print 'evals', evals, threshold
        assert_allclose(evals[0], threshold, rtol=1e-6, atol=1e-15)

        y = corr_clipped(x, threshold=threshold)
        evals = np.linalg.eigvalsh(y)
        #print 'evals', evals, threshold
        assert_allclose(evals[0], threshold, rtol=0.25, atol=1e-15)

        y = cov_nearest(x, method='nearest', n_fact=100, threshold=threshold)
        evals = np.linalg.eigvalsh(y)
        #print 'evals', evals, threshold
        #print evals[0] / threshold - 1
        assert_allclose(evals[0], threshold, rtol=1e-6, atol=1e-15)

        y = cov_nearest(x, n_fact=100, threshold=threshold)
        evals = np.linalg.eigvalsh(y)
        #print 'evals', evals, threshold
        #print evals[0] / threshold - 1
        assert_allclose(evals[0], threshold, rtol=0.25, atol=1e-15)
Пример #3
0
def test_corrpsd_threshold():
    x = np.array([[1, -0.9, -0.9], [-0.9, 1, -0.9], [-0.9, -0.9, 1]])

    #print np.linalg.eigvalsh(x)
    for threshold in [0, 1e-15, 1e-10, 1e-6]:

        y = corr_nearest(x, n_fact=100, threshold=threshold)
        evals = np.linalg.eigvalsh(y)
        #print 'evals', evals, threshold
        assert_allclose(evals[0], threshold, rtol=1e-6, atol=1e-15)

        y = corr_clipped(x, threshold=threshold)
        evals = np.linalg.eigvalsh(y)
        #print 'evals', evals, threshold
        assert_allclose(evals[0], threshold, rtol=0.25, atol=1e-15)

        y = cov_nearest(x, method='nearest', n_fact=100, threshold=threshold)
        evals = np.linalg.eigvalsh(y)
        #print 'evals', evals, threshold
        #print evals[0] / threshold - 1
        assert_allclose(evals[0], threshold, rtol=1e-6, atol=1e-15)

        y = cov_nearest(x, n_fact=100, threshold=threshold)
        evals = np.linalg.eigvalsh(y)
        #print 'evals', evals, threshold
        #print evals[0] / threshold - 1
        assert_allclose(evals[0], threshold, rtol=0.25, atol=1e-15)
Пример #4
0
    def test_cov_nearest(self):
        x = self.x
        res_r = self.res
        y = cov_nearest(x, method='nearest')
        #print np.max(np.abs(x - y))
        assert_almost_equal(y, res_r.mat, decimal=3)
        d = norm_f(x, y)
        assert_allclose(d, res_r.normF, rtol=0.001)

        y = cov_nearest(x, method='clipped')
        #print np.max(np.abs(x - y))
        assert_almost_equal(y, res_r.mat, decimal=2)
        d = norm_f(x, y)
        assert_allclose(d, res_r.normF, rtol=0.15)
Пример #5
0
    def test_cov_nearest(self):
        x = self.x
        res_r = self.res
        y = cov_nearest(x, method='nearest')
        #print np.max(np.abs(x - y))
        assert_almost_equal(y, res_r.mat, decimal=3)
        d = norm_f(x, y)
        assert_allclose(d, res_r.normF, rtol=0.001)

        y = cov_nearest(x, method='clipped')
        #print np.max(np.abs(x - y))
        assert_almost_equal(y, res_r.mat, decimal=2)
        d = norm_f(x, y)
        assert_allclose(d, res_r.normF, rtol=0.15)
Пример #6
0
def _generate_cov_matrix(nexog, nendog, ninstruments, collinearity,
                         endogeneity, instr_strength):
    exog_names, endog_names, instr_names = _variable_names(
        nexog, nendog, ninstruments)
    cols = exog_names + endog_names + instr_names + ["epsilon"]

    cov = np.zeros((len(cols), len(cols)))
    upper_indices = np.triu_indices(len(cols), k=1)
    nupper = len(upper_indices[0])
    cov[upper_indices] = np.random.uniform(low=-0, high=0.1, size=nupper)
    cov_df = pd.DataFrame(data=cov, columns=cols, index=cols)
    cov_df.loc["exog_0", "exog_1"] = collinearity
    higher_weight = 0.5 + 0.5 * (1 - collinearity)
    lower_weight = 0.5 - 0.5 * (1 - collinearity)
    cov_df.loc["exog_0",
               cols[2:]] = (higher_weight * cov_df.loc["exog_0", cols[2:]] +
                            lower_weight * cov_df.loc["exog_1", cols[2:]])
    cov_df.loc["exog_1",
               cols[2:]] = (higher_weight * cov_df.loc["exog_1", cols[2:]] +
                            lower_weight * cov_df.loc["exog_0", cols[2:]])
    cov_df.loc[exog_names + instr_names, "epsilon"] = 0
    cov_df.loc[endog_names, instr_names] = instr_strength
    cov_df.loc[endog_names, "epsilon"] = endogeneity

    cov = cov_df.values

    cov += cov.T
    cov[np.diag_indices(len(cols))] = 1
    cov = cov_nearest(cov, method="nearest", threshold=1e-10, n_fact=10)
    return cov
Пример #7
0
    def optimize(t, sigma, pi):
        nonlocal last_solution
        nr_of_assets = len(sigma)

        # only optimize if we have a re-balance trigger (early exit)
        if last_solution is not None and last_solution.sum() > 0.99:
            # so we had at least one valid solution in the past
            # we can early exit if we do not have any signal or or no signal for any currently hold asset
            if len(t.shape) > 1 and t.shape[1] == nr_of_assets:
                if t[:, last_solution >= 0.01].sum().any() < 1:
                    return keep_solution
            else:
                if t.sum().any() < 1:
                    return keep_solution

        # make sure covariance matrix is positive definite
        simga = cov_nearest(sigma)

        # we perform optimization except when all expected returns are < 0
        # then we early exit with an un-invest command
        if len(pi[:, pi[0] < 0]) == pi.shape[1]:
            return uninvest
        else:
            try:
                sol = solve_qp(risk_aversion * sigma, -pi.T, G=G, h=h, A=A, b=b, solver=solver)
                if sol is None:
                    _log.error("no solution found")
                    return uninvest
                else:
                    return sol
            except Exception as e:
                _log.error(traceback.format_exc())
                return uninvest
Пример #8
0
def do_normalized_pca(df,
                      df_dist,
                      dist_func=lambda x: 1 / x**2,
                      do_build_and_clean=True,
                      supervised=False,
                      supervised_t=0,
                      labels=None):
    ''' 
    fname_dist_matrix- must be a pickled dataframe st shape is square, symmetric
        is in the format saved in `build_distance_matrix`
    df - some data frame of patient data
    `do_build_and_clean`: if False, do not do centring or normalizing. The caller must 
    at least do centering on `df` in this case first, otherwise the results don't make sense
    '''
    # read in distance matrix and restrict to only those samples in df.index (row and column)
    # df_dist = pd.read_pickle(fname_dist_matrix)
    df.index.difference(df_dist.index).size==0 \
        and df_dist.index.difference(df.index).size==0

    L_weight = dist_func(df_dist)
    L_weight[L_weight == np.inf] = 0  # send inf's to zero

    # do supervised PCA adjustment if applicable
    if supervised:
        if labels is None:
            raise ValueError(
                "If running supervised=True, must supply labels lookup table")
        L_weight = get_supervised_t_weights(L_weight, labels, t=supervised_t)
        L_weight = symmetrize(L_weight)

    np.fill_diagonal(L_weight.values, 0)
    np.fill_diagonal(L_weight.values, -L_weight.sum())
    L_weight = -L_weight
    L_weight = cov_nearest(L_weight)

    # Code to handle the case that cov_nearest gives a matrix with one very small negative
    # eigenvalue, that makes the matrix not PSD
    # Solution is to add epsilon-Identity, where epsilon is magnitude of the smallest eig
    # but only do this if the perturbation this would cause is very small, as measured by
    # the smallest diagonal. If it would cause a big perturbation, throw an error
    eigs, _ = np.linalg.eig(L_weight)
    smallest_eig = min(eigs[0], 0)
    smallest_diag = np.min(np.diag(L_weight))
    rel_perturbation = abs(smallest_eig / smallest_diag)
    if rel_perturbation > 1e-5:
        raise ValueError("L_weight is non-neglegibly far from the PSD cone")
    L_weight = L_weight + abs(smallest_eig) * 10 * np.identity(len(L_weight))

    L = np.linalg.cholesky(L_weight)

    # clean non-variant alleles from df and build matrix
    if do_build_and_clean:
        M = build_matrix(clean(df))
    else:
        M = df.values
    A = L.dot(M)

    ret_pca = do_pca(A, n_components=10)
    return ret_pca
Пример #9
0
def test_corr_psd():
    # test positive definite matrix is unchanged
    x = np.array([[1, -0.2, -0.9], [-0.2, 1, -0.2], [-0.9, -0.2, 1]])

    y = corr_nearest(x, n_fact=100)
    #print np.max(np.abs(x - y))
    assert_almost_equal(x, y, decimal=14)

    y = corr_clipped(x)
    assert_almost_equal(x, y, decimal=14)

    y = cov_nearest(x, n_fact=100)
    assert_almost_equal(x, y, decimal=14)

    x2 = x + 0.001 * np.eye(3)
    y = cov_nearest(x2, n_fact=100)
    assert_almost_equal(x2, y, decimal=14)
Пример #10
0
def test_corr_psd():
    # test positive definite matrix is unchanged
    x = np.array([[1, -0.2, -0.9], [-0.2, 1, -0.2], [-0.9, -0.2, 1]])

    y = corr_nearest(x, n_fact=100)
    #print np.max(np.abs(x - y))
    assert_almost_equal(x, y, decimal=14)

    y = corr_clipped(x)
    assert_almost_equal(x, y, decimal=14)

    y = cov_nearest(x, n_fact=100)
    assert_almost_equal(x, y, decimal=14)

    x2 = x + 0.001 * np.eye(3)
    y = cov_nearest(x2, n_fact=100)
    assert_almost_equal(x2, y, decimal=14)
Пример #11
0
def test_corrpsd_threshold(threshold):
    x = np.array([[1, -0.9, -0.9], [-0.9, 1, -0.9], [-0.9, -0.9, 1]])

    y = corr_nearest(x, n_fact=100, threshold=threshold)
    evals = np.linalg.eigvalsh(y)
    assert_allclose(evals[0], threshold, rtol=1e-6, atol=1e-15)

    y = corr_clipped(x, threshold=threshold)
    evals = np.linalg.eigvalsh(y)
    assert_allclose(evals[0], threshold, rtol=0.25, atol=1e-15)

    y = cov_nearest(x, method='nearest', n_fact=100, threshold=threshold)
    evals = np.linalg.eigvalsh(y)
    assert_allclose(evals[0], threshold, rtol=1e-6, atol=1e-15)

    y = cov_nearest(x, n_fact=100, threshold=threshold)
    evals = np.linalg.eigvalsh(y)
    assert_allclose(evals[0], threshold, rtol=0.25, atol=1e-15)
Пример #12
0
def cov_matrix(symbols, start, end, interval='m'):
    if interval == 'y':
        data = [numpy.array(get_yr_returns(s, start, end)) for s in symbols]
    else:
        data = [
            numpy.array(get_returns(s, start, end, interval)) for s in symbols
        ]
    x = numpy.array(data)
    return cov_nearest(numpy.cov(x))
Пример #13
0
def test_corrpsd_threshold(threshold):
    x = np.array([[1, -0.9, -0.9], [-0.9, 1, -0.9], [-0.9, -0.9, 1]])

    y = corr_nearest(x, n_fact=100, threshold=threshold)
    evals = np.linalg.eigvalsh(y)
    assert_allclose(evals[0], threshold, rtol=1e-6, atol=1e-15)

    y = corr_clipped(x, threshold=threshold)
    evals = np.linalg.eigvalsh(y)
    assert_allclose(evals[0], threshold, rtol=0.25, atol=1e-15)

    y = cov_nearest(x, method='nearest', n_fact=100, threshold=threshold)
    evals = np.linalg.eigvalsh(y)
    assert_allclose(evals[0], threshold, rtol=1e-6, atol=1e-15)

    y = cov_nearest(x, n_fact=100, threshold=threshold)
    evals = np.linalg.eigvalsh(y)
    assert_allclose(evals[0], threshold, rtol=0.25, atol=1e-15)
Пример #14
0
 def test_cov_nearest(self):
     x = self.x
     res_r = self.res
     with warnings.catch_warnings():
         warnings.simplefilter("ignore")
         y = cov_nearest(x, method='nearest', threshold=1e-7)
     #print np.max(np.abs(x - y))
     assert_almost_equal(y, res_r.mat, decimal=2)
     d = norm_f(x, y)
     assert_allclose(d, res_r.normF, rtol=0.0015)
Пример #15
0
 def test_cov_nearest(self):
     x = self.x
     res_r = self.res
     with warnings.catch_warnings():
         warnings.simplefilter("ignore")
         y = cov_nearest(x, method='nearest', threshold=1e-7)
     #print np.max(np.abs(x - y))
     assert_almost_equal(y, res_r.mat, decimal=2)
     d = norm_f(x, y)
     assert_allclose(d, res_r.normF, rtol=0.0015)
Пример #16
0
def fix_numerical_problem(k: np.ndarray,
                          tolerance: float) -> np.ndarray:
    """

    :param k:
    :param tolerance:
    :return:
    """
    k = cov_nearest(k, threshold=tolerance)
    cholesky_k = np.linalg.cholesky(k).T
    return cholesky_k
Пример #17
0
def add_noise(theta, p, alpha, threshold=0.1):
    noise_mat = make_sparse_spd_matrix(dim=p,
                                       alpha=alpha,
                                       norm_diag=False,
                                       smallest_coef=-threshold,
                                       largest_coef=threshold)
    np.fill_diagonal(theta, 0.0)
    theta_star = cov_nearest(noise_mat + theta,
                             method="clipped",
                             threshold=0.1)
    return theta_star
Пример #18
0
    def _gista(self, theta0, S, _lambdas, verbose=False):
        """
        G-ISTA algorithm

        https://papers.nips.cc/paper/4574-iterative-thresholding-algorithm-for-sparse-inverse-covariance-estimation.pdf
        """

        theta = theta0
        t = min(np.linalg.eigvals(theta0))**2
        p = len(theta)

        if verbose:
            print(f'f(X,S) = {self.sfunc.eval(theta0, S)}')
            print(f'g(X,rho) = {self.nsfunc.eval(theta0, _lambdas)}')
            print(
                f'Initial Objective: {self._pgm_objective(theta0, S, _lambdas)}'
            )

        if self._pgm_objective(self.theta0, S, _lambdas) > 10000:
            # Skip, bad starting point
            theta = make_sparse_spd_matrix(p,
                                           alpha=0.5,
                                           norm_diag=False,
                                           smallest_coef=-1.0,
                                           largest_coef=1.0)

        for i in range(self.max_iters):
            if not _is_pos_def(theta):
                print('Clipped Precision matrix')
                theta = cov_nearest(theta, method="clipped", threshold=0.1)

            if self.ss_type == 'backtracking':
                t = self._step_size(theta, S, _lambdas, t)

            delta = self._duality_gap(p, theta, S, _lambdas)

            if verbose:
                print(f'Duality Gap: {delta}.')

            if delta < self.epsilon and self.dual_gap:
                print(f'iterations: {i}')
                print(f'Duality Gap: {delta} < {self.epsilon}. Exiting.')
                break

            theta_k1 = self.nsfunc.prox(
                theta - t * self.sfunc.gradient(theta, S), _lambdas)

            if self.ss_type == 'backtracking':
                t = _set_next_inital_step_size(theta_k1, theta)

            theta = theta_k1

        return theta
Пример #19
0
def r_fit_ellipse(X, confidence=0.9, n=100):
    from statsmodels.stats.correlation_tools import cov_nearest
    from scipy.stats import f
    
    cov = cov_nearest(np.cov(X.T), method='clipped', threshold=1e-5)
    center = X.mean(axis=0).reshape(-1, 1)
    
    chol_decomp = np.linalg.cholesky(cov)

    radius = np.sqrt(2 * f.ppf(confidence, 2, X.shape[0]-1))

    angles = np.arange(n) * 2 * pi/n
    unit_circle = np.vstack((np.cos(angles), np.sin(angles))).T
    ellipse = (center + radius * (unit_circle.dot(chol_decomp)).T).T

    return ellipse
Пример #20
0
def near_psd(cov, method='clipped', threshold=1e-15, n_fact=100) -> np.ndarray:
    """
    Finds the nearest covariance matrix that is positive (semi-) definite

    This converts the covariance matrix to a correlation matrix. Then, finds the nearest correlation matrix that is
    positive semi-definite and converts it back to a covariance matrix using the initial standard deviation.

    The smallest eigenvalue of the intermediate correlation matrix is approximately equal to the ``threshold``.
    If the threshold=0, then the smallest eigenvalue of the correlation matrix might be negative, but zero within a
    numerical error, for example in the range of -1e-16.

    Input covariance matrix must be symmetric.

    Parameters
    ----------
    cov: (N, N) array like
        Initial covariance matrix

    method: { 'clipped', 'nearest' }, optional
         If "clipped", this function clips the eigenvalues, replacing eigenvalues smaller than the threshold by the
        threshold. The new matrix is normalized, so that the diagonal elements are one. Compared to "nearest", the
        distance between the original correlation matrix and the positive definite correlation matrix is larger.
        However, it is much faster since it only computes eigenvalues once.

        If "nearest", then the function iteratively adjust the correlation matrix by clipping the
        eigenvalues of a difference matrix. The diagonal elements are set to one.

    threshold: float
        Clipping threshold for smallest eigenvalue

    n_fact: int
        Factor to determine the maximum number of iterations if method is set to "nearest"

    Returns
    -------
    ndarray
        positive semi-definite matrix
    """
    cov = np.asarray(cov)

    if not is_symmetric(cov):
        raise ValueError('covariance matrix must be symmetric')

    if is_psd(cov):
        return cov

    return cov_nearest(cov, method, threshold, n_fact, False)
Пример #21
0
def near_psd(cov, method='clipped', threshold=1e-15, n_fact=100) -> np.ndarray:
    """
    Finds the nearest covariance matrix that is positive (semi-) definite

    This converts the covariance matrix to a correlation matrix. Then, finds the nearest correlation matrix that is
    positive semi-definite and converts it back to a covariance matrix using the initial standard deviation.

    The smallest eigenvalue of the intermediate correlation matrix is approximately equal to the ``threshold``.
    If the threshold=0, then the smallest eigenvalue of the correlation matrix might be negative, but zero within a
    numerical error, for example in the range of -1e-16.

    Input covariance matrix must be symmetric.

    Parameters
    ----------
    cov: (N, N) array like
        Initial covariance matrix

    method: { 'clipped', 'nearest' }, optional
         If "clipped", this function clips the eigenvalues, replacing eigenvalues smaller than the threshold by the
        threshold. The new matrix is normalized, so that the diagonal elements are one. Compared to "nearest", the
        distance between the original correlation matrix and the positive definite correlation matrix is larger.
        However, it is much faster since it only computes eigenvalues once.

        If "nearest", then the function iteratively adjust the correlation matrix by clipping the
        eigenvalues of a difference matrix. The diagonal elements are set to one.

    threshold: float
        Clipping threshold for smallest eigenvalue

    n_fact: int
        Factor to determine the maximum number of iterations if method is set to "nearest"

    Returns
    -------
    ndarray
        positive semi-definite matrix
    """
    cov = np.asarray(cov)

    if not is_symmetric(cov):
        raise ValueError('covariance matrix must be symmetric')

    if is_psd(cov):
        return cov

    return cov_nearest(cov, method, threshold, n_fact, False)
 def test_psd_norm(self):
     data = {'AAXJ': [66.029999000000004, 63.0, 59.270000000000003, 53.340000000000003, 52.75],
             'UBU': [20.079999999999998, 20.079999999999998, 21.550000000000001, 20.559999999999999, 20.18],
             'ALD': [45.939999, 45.330002, 44.490001999999997, 42.729999999999997, 42.409999999999997],
             'VSO': [47.399999999999999, 42.899999999999999, 43.340000000000003, 41.719999999999999, 40.950000000000003],
             'VAS': [73.700000000000003, 69.989999999999995, 72.099999999999994, 66.569999999999993, 64.549999999999997],
             'BTWJPNF_AU': [0.66000000000000003, 0.66000000000000003, 0.68999999999999995, 0.67000000000000004, 0.63],
             'VGS': [59.75, 58.439999999999998, 61.0, 58.25, 56.780000000000001],
             'EMB': [112.370003, 109.91999800000001, 109.660004, 108.010002, 106.400002],
             'FTAL': [41.854999999999997, 39.329999999999998, 40.390000000000001, 38.32, 37.229999999999997],
             'UBP': [20.150717539569801, 19.1999999999999, 19.050000000000001, 17.990000000000101, 17.240000000000101],
             'BTWASHF_AU': [1.8799999999999999, 1.8400000000000001, 1.8799999999999999, 1.8400000000000001, 1.8400000000000001],
             'VLC': [64.719999999999999, 61.219999999999999, 63.530000000000001, 57.469999999999999, 55.170000000000002],
             'MCHI': [59.849997999999999, 56.040000999999997, 50.040000999999997, 44.099997999999999, 43.810001],
             'UBE': [20.983828369806702, 20.140000000000001, 21.510000000000002, 20.1099999999999, 19.75],
             'BTA0420_AU': [1.1799999999999999, 1.1299999999999999, 1.0700000000000001, 1.02, 1.0],
             'SLXX': [136.13999999999999, 131.22, 134.57499999999999, 130.71000000000001, 131.46000000000001],
             'VTS': [143.81, 139.49000000000001, 149.49000000000001, 143.16, 139.47],
             'RGB': [21.379999999999999, 21.0, 21.280000000000001, 21.399999999999999, 21.52],
             'IJP': [17.239999999999998, 16.710000000000001, 17.68, 16.98, 16.09],
             'HOW0062_AU': [1.05, 1.05, 1.0, 1.01, 1.02],
             'DSUM': [24.91, 24.739999999999998, 24.510000000000002, 23.040001, 23.559999000000001],
             'ILB': [115.41, 113.8, 114.0, 114.56, 114.31999999999999],
             'PEBIX_US': [9.9499999999999993, 10.529999999999999, 10.19, 10.1, 9.7400000000000002],
             'BTWFAUS_AU': [1.74, 1.6499999999999999, 1.73, 1.5900000000000001, 1.5600000000000001],
             'BTWEUSH_AU': [1.3200000000000001, 1.29, 1.3799999999999999, 1.3500000000000001, 1.3200000000000001],
             'IEAG': [87.209999999999994, 83.355000000000004, 84.674999999999997, 87.055000000000007, 87.405000000000001],
             'RSM': [20.789999999999999, 20.550000000000001, 20.77, 20.850000000000001, 20.629999999999999],
             'ROTHWSE_AU': [2.6400000000000001, 2.4700000000000002, 2.3999999999999999, 2.3300000000000001, 2.3900000000000001],
             'UBA': [19.886842423199901, 18.6400000000001, 19.129999999999999, 17.440000000000001, 16.879999999999999],
             'IUSB': [101.769997, 100.519997, 100.459999, 100.389999, 100.25],
             'ROTHFXD_AU': [1.23, 1.21, 1.1899999999999999, 1.21, 1.21],
             'UBJ': [20.763995359855802, 20.479000000000099, 21.379999999999999, 20.549999999999901, 19.469999999999999],
             'IEU': [61.130000000000003, 57.57, 61.130000000000003, 58.340000000000003, 56.100000000000001],
             'VGE': [62.549999999999997, 60.229999999999997, 58.549999999999997, 53.600000000000001, 52.880000000000003],
             'RIGS': [25.25, 24.940000999999999, 24.940000999999999, 24.549999, 24.100000000000001],
             'VHY': [69.030000000000001, 65.040000000000006, 64.150000000000006, 59.219999999999999, 57.100000000000001],
             'UBW': [21.244103679132198, 20.510000000000002, 21.620000000000001, 19.779999999999902, 20.079999999999998],
             'BOND': [26.280000000000001, 25.800000000000001, 26.030000000000001, 26.23, 26.02],
             'BTWAMSH_AU': [1.23, 1.21, 1.24, 1.21, 1.1799999999999999]}
     df = pd.DataFrame(data)
     df_cov = df.cov()
     #print(df_cov)
     p1 = cov_nearest(df_cov)
Пример #23
0
    def gauss_low_filter(self):
        noise_var = 1  #conditioning propblems

        K_ry = self.kernel_conv(flag=2)
        K_ry = K_ry.T

        K_y = K_SE(self.x, self.x, self.gamma,
                   self.sigma) + self.noise * np.eye(self.Nx)
        self.filtered = np.matmul(K_ry, np.linalg.inv(K_y)).dot(self.y)
        M = K_ry @ np.linalg.solve(
            K_y + noise_var * np.eye(m), K_ry.T
        )  #Here we use noise_var as there may be conditioning problems

        K_rr = self.kernel_conv(flag=1)
        # print(np.linalg.eigvals(K_rr).min())
        K_rr_parche = cov_nearest(K_rr) + 1e-8 * np.eye(len(self.time))
        self.Covariance = (K_rr_parche - M)
        self.error_bar = 2 * np.sqrt(np.diag(self.Covariance))

        self.filt_spect = 2.0 / self.grid_num * np.abs(
            fft(self.filtered)[:int(self.grid_num / 2)])
Пример #24
0
def cov_fix(cov, method="clipped", **kwargs):
    r"""
    Fix a covariance matrix to a positive definite matrix.

    Parameters
    ----------
    cov : nd-array of shape (n_features, n_features)
        Features covariance matrix, where n_features is the number of features.
    method : str
        The default value is 'clipped', see more in `cov_nearest <https://www.statsmodels.org/stable/generated/statsmodels.stats.correlation_tools.cov_nearest.html>`_.
    **kwargs
        Other parameters from `cov_nearest <https://www.statsmodels.org/stable/generated/statsmodels.stats.correlation_tools.cov_nearest.html>`_.

    Returns
    -------
    cov_ : bool
        A positive definite covariance matrix.

    Raises
    ------
        ValueError when the value cannot be calculated.

    """
    flag = False
    if isinstance(cov, pd.DataFrame):
        cols = cov.columns.tolist()
        flag = True

    cov_ = np.array(cov, ndmin=2)
    cov_ = cov_nearest(cov_, method=method, **kwargs)
    cov_ = np.array(cov_, ndmin=2)

    if flag:
        cov_ = pd.DataFrame(cov_, index=cols, columns=cols)

    return cov_
Пример #25
0
def preproc_sm(sm,
               confounders,
               final_deconfound=True,
               feature_names=None,
               hcp_data_dict_correct_pct_to_t=True,
               nearest_psd_threshold=1e-6):
    """Preprocessing of subject measures.

    Parameters
    ----------
    confounders : np.ndarray (n_samples, n_features)
        confounder data matrix
    sm : pd.DataFrame (n_samples, n_Y_features)
        behavioral and demographic data matrix. Names of features to include,
        and confounds must be column names    final_deconfound : bool
        if ``True`` the final scores are once more deconfounded before they
        are returned
    feature_names : None, slice or list-like
        names of features to use, names must be columns in ``sm``. If ``None``
        default (i.e. from Smith et al. 2015, applicable to HCP data) feature
        names are used
    hcp_data_dict_correct_pct_to_t : bool
        whether to correct HCP data dict names, see :func:`_check_features`
    nearest_psd_threshold : float
        threshold for finding an acceptable nearest positive definite matrix

    Returns
    -------
    uu2 : np.ndarray (n_samples, n_Y_features)
        processed dataset Y
    uu2_white : np.ndarray (n_samples, n_Y_features)
        whitened processed dataset Y
    S4_raw : np.ndarray (n_samples, n_Y_features)
        unprocessed Y data comprising only the selected features
    feature_names : list
        ordered list of feature names corresponding to the columns of Y
    """
    S4_raw, S4_deconfounded, feature_names = prepare_sm(
        sm, confounders, feature_names, hcp_data_dict_correct_pct_to_t)

    # estimate covariance-matrix, ignoring missing values
    # NOTE: This is the n_subjects x n_subjects covariance matrix across
    # features!
    S_cov = np.nan * np.empty((S4_raw.shape[0], S4_raw.shape[0]))
    for i in trange(len(S_cov), desc='subject', leave=False):
        for j in range(i + 1):
            mask = np.isfinite(S4_deconfounded[i]) \
                   & np.isfinite(S4_deconfounded[j])
            S_cov[i, j] = S_cov[j, i] = np.cov(S4_deconfounded[i, mask],
                                               S4_deconfounded[j, mask])[0, 1]
    assert np.isfinite(S_cov).all()
    S_cov_psd = cov_nearest(S_cov, threshold=nearest_psd_threshold)
    assert np.isfinite(S_cov_psd).all()
    assert np.allclose(S_cov_psd, S_cov_psd.T)
    assert np.linalg.matrix_rank(S_cov_psd) == len(S_cov_psd)
    print('smallest sval S_cov =',
          np.linalg.svd(S_cov, compute_uv=False, hermitian=True).min())
    print('smallest sval S_cov_psd =',
          np.linalg.svd(S_cov_psd, compute_uv=False, hermitian=True).min())
    print('rank S_cov =', np.linalg.matrix_rank(S_cov))
    print('rank S_cov_psd =', np.linalg.matrix_rank(S_cov_psd))

    # --- PCA ---

    dd2, uu2 = np.linalg.eigh(S_cov_psd)
    assert np.allclose((uu2**2).sum(0), 1)
    order = np.argsort(dd2)[::-1]
    dd2 = dd2[order]
    uu2 = uu2[:, order]
    assert np.all(np.diff(dd2) <= 0)

    uu2_white = uu2 / uu2.std(0)
    uu2 = uu2 * (np.sqrt(dd2) / uu2.std(0)).reshape(1, -1)

    # uu2 doesn't have mean 0, probably because of the way it's computed,
    # i.e. with cov_nearest, ...
    #assert np.allclose(uu2.mean(0), 0)
    assert np.allclose(uu2_white.var(0), 1)
    assert np.allclose(uu2.var(0), dd2)

    if final_deconfound:
        # deconfound again, just to be safe
        uu2_white = deconfound(uu2_white, confounders)
        uu2 = deconfound(uu2, confounders)

    return uu2, uu2_white, S4_raw, feature_names
 def nearestPSD(P):
     # other options:not ideal but necessary for robust solutions:
     # 1) P?1/2P+1/2P' to even out the off-diagonal terms -- for symmetry
     # 2)Let P=P+eps In×n, where eps is a small scalar to make sure matrix is not ill conditioned
     # 3) use 64fp arithmetic
     return cov_nearest(P)
Пример #27
0
def update_row_col(subproblem1, subproblem2, partition_list, attributes):
    """Updates the last two rows and columns of COV with the exponential kernel values 
   of the string distances of the corresponding subproblem constraints and finds the
   nearest covariance matrix
   """
    global COV

    sigma = attributes['KG_sigma']
    l = attributes['KG_l']
    s1_const = subproblem1['constraints']
    s2_const = subproblem2['constraints']

    const_vec = [
        partition_list[i]['constraints']
        for i in range(len(partition_list) - 2)
    ]

    # Obtain constraints as strings
    s1_const_str = ''
    for k in s1_const.keys():
        s1_const_str += str(s1_const[k])

    s2_const_str = ''
    for k in s2_const.keys():
        s2_const_str += str(s2_const[k])

    const_str = []
    for d in const_vec:
        d_str = ''
        for k in d.keys():
            d_str += str(d[k])
        const_str.append(d_str)

    const_str += [s1_const_str, s2_const_str]

    # Calculate 1st column to fill
    dist1 = np.array([l_dist(s1_const_str, string) for string in const_str])
    col1 = sigma**2 * np.exp(-np.square(dist1) / (2 * l**2))
    #col1[len(col1)-2] += 2*l**2
    #col1[len(col1)-2] += sigma**2 #positive-definite adjustment: max variance

    # Calculate 1st column to fill
    dist2 = np.array([l_dist(s2_const_str, string) for string in const_str])
    col2 = sigma**2 * np.exp(-np.square(dist2) / (2 * l**2))
    #col2[len(col2)-1] += 2*l**2
    #col2[len(col2)-1] += sigma**2 #positive-definite adjustment: max variance

    # Update COV last two rows and columns
    M = COV.to_numpy()
    k = M.shape[0] - 1

    M[:, k - 1] = col1
    M[k - 1, :] = col1
    M[:, k] = col2
    M[k, :] = col2

    M = M.astype(float)
    M = cov_nearest(M, threshold=1e-6, n_fact=10000, return_all=False)
    S = COV.to_numpy()
    S[:, :] = M

    #DEBUG
    #if utilities.is_pos_def(M) != True:
    ##print("Non positive definite matrix M")
    #raise Exception("Non positive definite matrix M")
    #if utilities.is_pos_semi_def(M) != True:
    ##print("Non positive semidefinite matrix M")
    #raise Exception("Non positive semidefinite matrix M")

    return
Пример #28
0
    def covariance_matrix_solve(self, expval, index, stdev, rhs):
        """
        Solves matrix equations of the form `covmat * soln = rhs` and
        returns the values of `soln`, where `covmat` is the covariance
        matrix represented by this class.

        Parameters
        ----------
        expval: array-like
           The expected value of endog for each observed value in the
           group.
        index: integer
           The group index.
        stdev : array-like
            The standard deviation of endog for each observation in
            the group.
        rhs : list/tuple of array-like
            A set of right-hand sides; each defines a matrix equation
            to be solved.

        Returns
        -------
        soln : list/tuple of array-like
            The solutions to the matrix equations.

        Notes
        -----
        Returns None if the solver fails.

        Some dependence structures do not use `expval` and/or `index`
        to determine the correlation matrix.  Some families
        (e.g. binomial) do not use the `stdev` parameter when forming
        the covariance matrix.

        If the covariance matrix is singular or not SPD, it is
        projected to the nearest such matrix.  These projection events
        are recorded in the fit_history member of the GEE model.

        Systems of linear equations with the covariance matrix as the
        left hand side (LHS) are solved for different right hand sides
        (RHS); the LHS is only factorized once to save time.

        This is a default implementation, it can be reimplemented in
        subclasses to optimize the linear algebra according to the
        struture of the covariance matrix.
        """

        vmat, is_cor = self.covariance_matrix(expval, index)
        if is_cor:
            vmat *= np.outer(stdev, stdev)

        # Factor the covariance matrix.  If the factorization fails,
        # attempt to condition it into a factorizable matrix.
        threshold = 1e-2
        success = False
        cov_adjust = 0
        for itr in range(20):
            try:
                vco = spl.cho_factor(vmat)
                success = True
                break
            except np.linalg.LinAlgError:
                vmat = cov_nearest(vmat, method=self.cov_nearest_method,
                                   threshold=threshold)
                threshold *= 2
                cov_adjust += 1

        self.cov_adjust.append(cov_adjust)

        # Last resort if we still can't factor the covariance matrix.
        if not success:
            warnings.warn(
                "Unable to condition covariance matrix to an SPD "
                "matrix using cov_nearest", ConvergenceWarning)
            vmat = np.diag(np.diag(vmat))
            vco = spl.cho_factor(vmat)

        soln = [spl.cho_solve(vco, x) for x in rhs]
        return soln
Пример #29
0
def bl_model(sigma, w_tilde, p, v, n, c=1.0, lambda_bar=1.2):
    """
    This is an implementation of the Black-Litterman model based
    on Meucci's article:

    http://papers.ssrn.com/sol3/papers.cfm?abstract_id=1117574

    Argument Definitions:
      Required:
        :param sigma: nxn numpy array covariance matrix of the asset
                      return time series
        :param w_tilde: nx1 numpy array market cap portfolio weights
        :param p: mxn numpy array corresponding to investor views on
                  future asset movements
        :param v: mx1 numpy array of expected returns of portfolios
                  corresponding to views
        :param n: length of time series of returns used to compute
                  covariance matrix
        :param c: constant representing overall confidence in the views
                  return estimator
        :param lambda_bar: risk-aversion level which Black and
                           Litterman set to 1.2

    Argument Constraints:
        Required:
        sigma -- positive definite symmetric matrix
        w_tilde -- vector with positive entries that sum to one
        p -- matrix of positive or negative floats
        v -- matrix of positive or negative floats

        Optional:
        c -- any positive float, default to 1 (as in example on page 5)
        lambda_bar -- positive float, default to 1.2 as mentioned
                      after equation (5)
    """
    logger.debug("Running BL with "
                 "sigma:\n{}\nw_tilde:\n{}\np:\n{}\nv:\n{}\nn:{}".format(
                     sigma, w_tilde, p, v, n))

    pi = 2.0 * lambda_bar * np.dot(sigma, w_tilde)  # equation (5)
    tau = 1.0 / float(n)  # equation (8)

    omega = np.dot(np.dot(p, sigma), p.T) / c  # equation (12)

    # Main model, equations (20) and (21)
    m1 = np.dot(tau * np.dot(sigma, p.T),
                inv(tau * np.dot(p, np.dot(sigma, p.T)) + omega))
    m2 = v - np.dot(p, pi)
    m3 = np.dot(p, sigma)

    mu_bl = pi + np.dot(m1, m2)
    sig_bl = (1.0 + tau) * sigma - tau * np.dot(m1, m3)

    # Make the matrix symmetric
    sym_bl = (sig_bl + sig_bl.T) / 2

    # The cov matrix may have not been strictly pos semi definite
    # due to rounding etc. Make sure it is.
    psd_bl = cov_nearest(sym_bl)

    return mu_bl, psd_bl
Пример #30
0
    def covariance_matrix_solve(self, expval, index, stdev, rhs):
        """
        Solves matrix equations of the form `covmat * soln = rhs` and
        returns the values of `soln`, where `covmat` is the covariance
        matrix represented by this class.

        Parameters
        ----------
        expval: array-like
           The expected value of endog for each observed value in the
           group.
        index: integer
           The group index.
        stdev : array-like
            The standard deviation of endog for each observation in
            the group.
        rhs : list/tuple of array-like
            A set of right-hand sides; each defines a matrix equation
            to be solved.

        Returns
        -------
        soln : list/tuple of array-like
            The solutions to the matrix equations.

        Notes
        -----
        Returns None if the solver fails.

        Some dependence structures do not use `expval` and/or `index`
        to determine the correlation matrix.  Some families
        (e.g. binomial) do not use the `stdev` parameter when forming
        the covariance matrix.

        If the covariance matrix is singular or not SPD, it is
        projected to the nearest such matrix.  These projection events
        are recorded in the fit_history member of the GEE model.

        Systems of linear equations with the covariance matrix as the
        left hand side (LHS) are solved for different right hand sides
        (RHS); the LHS is only factorized once to save time.

        This is a default implementation, it can be reimplemented in
        subclasses to optimize the linear algebra according to the
        struture of the covariance matrix.
        """

        vmat, is_cor = self.covariance_matrix(expval, index)

        if is_cor:
            vmat *= np.outer(stdev, stdev)

        # Factor the covariance matrix.  If the factorization fails,
        # attempt to condition it into a factorizable matrix.
        threshold = 1e-2
        success = False
        cov_adjust = 0
        for itr in range(20):
            try:
                vco = spl.cho_factor(vmat)
                success = True
                break
            except np.linalg.LinAlgError:
                vmat = cov_nearest(vmat,
                                   method=self.cov_nearest_method,
                                   threshold=threshold)
                threshold *= 2
                cov_adjust += 1

        self.cov_adjust.append(cov_adjust)

        # Last resort if we still can't factor the covariance matrix.
        if success == False:
            warnings.warn(
                "Unable to condition covariance matrix to an SPD matrix using cov_nearest",
                ConvergenceWarning)
            vmat = np.diag(np.diag(vmat))
            vco = spl.cho_factor(vmat)

        soln = [spl.cho_solve(vco, x) for x in rhs]
        return soln
Пример #31
0
def simulate(A, T, P, K, J, D):
    """
    Simulates a model from framework 2.
    Returns the regressor object, F0, G0, Gamma and epsilon
    """
    ## F is autoregressive with matrix A
    e = np.random.randn(K, T)
    for i in range(1, T):
        e[:, i:i + 1] += np.matmul(A, e[:, i - 1:i])

    F = e.T

    ## X is standard normal 1 covariate only
    X = np.random.randn(P, D)

    ## Phi is generated from three functions
    Phi = np.zeros((P, J))
    Phi[:, 0:1] = X
    Phi[:, 1:2] = X**2 - 1
    Phi[:, 2:3] = X**3 - 2 * X

    ## Random loadings
    B = np.random.randn(J, K)

    G = np.matmul(Phi, B)

    H = co_diagonalize(np.matmul(F.T, F), np.linalg.pinv(np.matmul(G.T, G)))
    H = np.sqrt(T) * H

    ## Rotate loadings and factors
    F0 = np.matmul(F, H)
    G0 = np.matmul(G, np.linalg.pinv(H.T))

    ## Simulate the noise
    alpha = 7.06
    beta = 536.93
    mu = -0.0019
    sig = 0.1499
    Diag = np.diag(np.random.gamma(alpha, 1 / beta, size=P))
    sigma0 = np.random.normal(mu, sig, size=(P, P))
    for i in range(P):
        sigma0[i, i] = 1.0

    for i in range(P):
        for j in range(i, P):
            sigma0[i, j] = sigma0[j, i]

    sigma0 = sigma0 * (np.abs(sigma0) > 0.03)
    sigma0 = cov_nearest(sigma0, threshold=1e-10)

    covariance = np.matmul(Diag, np.matmul(sigma0, Diag))

    noise = np.random.multivariate_normal(np.zeros(P), covariance, T).T

    ## Finalize the model

    Y = np.matmul(G, F.T) + noise

    reg_PPCA = Regressor()
    # reg_PPCA.add_line([1, 0])
    # reg_PPCA.add_square([1, 0, -1])
    # reg_PPCA.add_cube([1, 0, -2, 0])
    reg_PPCA.add_cubic_splines(X.min(), X.max(),
                               int(3 * (P * min(P, T))**(0.25)))

    reg_PPCA.fit_PPCA(Y, X, 3, find_K=False)

    reg_PC = Regressor()
    reg_PC.add_line([1, 0])
    reg_PC.add_square([1, 0, -1])
    reg_PC.add_cube([1, 0, -2, 0])

    reg_PC.fit_PC(Y, 3, find_K=False)

    return reg_PPCA, reg_PC, F0, G0, np.zeros((P, K)), noise
Пример #32
0
# print slope, intercept, r_value, p_value, std_err
yy = f(x, slope, intercept)

pol_raw0, polcov_raw0 = sciopt.curve_fit(f,
                                         x,
                                         y,
                                         sigma=y_err,
                                         p0=[intercept, slope],
                                         maxfev=1200)
pol_raw_err0 = np.sqrt(np.diag(polcov_raw0))

# cov1 = lftools._get_covariance_matrix_from_raw(y_raw)
# cov1 = cov_nearest(np.cov(y_raw), method="nearest", threshold=9e-16, n_fact=500)
# cov1 = near_psd(np.cov(y_raw))
# cov1 = nearPSD(np.cov(y_raw))
cov1 = cov_nearest(np.cov(y_raw), method="nearest")
# print np.linalg.eigvals(cov1).min()
pol_raw1, polcov_raw1 = sciopt.curve_fit(f,
                                         x,
                                         y,
                                         sigma=cov1,
                                         p0=[intercept, slope],
                                         maxfev=2000,
                                         ftol=1e-16,
                                         epsfcn=1e-10,
                                         xtol=1e-10)
pol_raw_err1 = np.sqrt(np.diag(polcov_raw1))

# cov2 = lftools._get_covariance_matrix_from_raw(V, iscov=True)
cov2 = cov_nearest(V, method="nearest", threshold=9e-16, n_fact=500)
pol_raw2, polcov_raw2 = sciopt.curve_fit(f,